import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import torch
from torch.utils.data import Dataset
import torchvision
import torchvision.transforms as transforms
# Data download and preprocessing
DOWNLOAD_MNIST = True # If already download , set as False
train_data = torchvision.datasets.MNIST(
root ='./ mnist /',
train =True , # this is training data
# transform = torchvision . transforms . ToTensor () ,
download = DOWNLOAD_MNIST ,)
test_data = torchvision . datasets . MNIST ( root ='./ mnist /', train = False )
# change the features to numpy
X_train = train_data.train_data.numpy()
X_test = test_data.test_data.numpy()
# change the labels to numpy
Y_train = train_data.train_labels.numpy()
Y_test = test_data.test_labels.numpy()
/Users/fuwang/opt/anaconda3/lib/python3.9/site-packages/torchvision/datasets/mnist.py:75: UserWarning: train_data has been renamed data
warnings.warn("train_data has been renamed data")
/Users/fuwang/opt/anaconda3/lib/python3.9/site-packages/torchvision/datasets/mnist.py:80: UserWarning: test_data has been renamed data
warnings.warn("test_data has been renamed data")
/Users/fuwang/opt/anaconda3/lib/python3.9/site-packages/torchvision/datasets/mnist.py:65: UserWarning: train_labels has been renamed targets
warnings.warn("train_labels has been renamed targets")
/Users/fuwang/opt/anaconda3/lib/python3.9/site-packages/torchvision/datasets/mnist.py:70: UserWarning: test_labels has been renamed targets
warnings.warn("test_labels has been renamed targets")
fig, ax = plt.subplots(1,1)
plottable_image = np.reshape(X_train[0], (28, 28))
ax.imshow(plottable_image, cmap='gray_r')
print('The label in Y_train is', Y_train[0])
The label in Y_train is 5
X_train.shape
(60000, 28, 28)
X_test.shape
(10000, 28, 28)
def NormalizeData(data):
return (data - np.min(data)) / (np.max(data) - np.min(data))
x_test_normal = NormalizeData(X_test)
x_train_normal = NormalizeData(X_train)
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
# Label Encoding
labelencoder = LabelEncoder()
Y_train_label = labelencoder.fit_transform(np.array(Y_train))
# One-Hot Encoder
enc = OneHotEncoder(sparse = False,handle_unknown='ignore')
Y_train_encode = enc.fit_transform(Y_train_label.reshape(-1,1))
print(Y_train_encode)
[[0. 0. 0. ... 0. 0. 0.] [1. 0. 0. ... 0. 0. 0.] [0. 0. 0. ... 0. 0. 0.] ... [0. 0. 0. ... 0. 0. 0.] [0. 0. 0. ... 0. 0. 0.] [0. 0. 0. ... 0. 1. 0.]]
Y_test_label = labelencoder.fit_transform(np.array(Y_test))
Y_test_encode = enc.fit_transform(Y_test_label.reshape(-1,1))
print(Y_test_encode)
[[0. 0. 0. ... 1. 0. 0.] [0. 0. 1. ... 0. 0. 0.] [0. 1. 0. ... 0. 0. 0.] ... [0. 0. 0. ... 0. 0. 0.] [0. 0. 0. ... 0. 0. 0.] [0. 0. 0. ... 0. 0. 0.]]
One hot encoding makes our training data more useful and expressive, and it can be rescaled easily. By using numeric values, we more easily determine a probability for our values. In particular, one hot encoding is used for our output values, since it provides more nuanced predictions than single labels.
Determining the state has a low and constant cost of accessing one flip-flop
Changing the state has the constant cost of accessing two flip-flops
Easy to design and modify
Easy to detect illegal states
Using an one-hot encoding typically allows a state machine to run at a faster clock rate than any other encoding of that state machine
from sklearn.neighbors import KNeighborsClassifier
from mlxtend.plotting import plot_decision_regions
trainsamples, nx_train, ny_train = x_train_normal.shape
X_train_data = x_train_normal.reshape((trainsamples,nx_train*ny_train))
testsample, nx_test,ny_test = x_test_normal.shape
X_test_data = x_test_normal.reshape((testsample,nx_test*ny_test))
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(X_train_data,Y_train_encode)
KNeighborsClassifier(n_neighbors=10)
knn.score(X_test_data,Y_test_encode)
0.9558
print(f'Test error for KNN Alogrithm is {round(1-0.9558,4)*100}%')
Test error for KNN Alogrithm is 4.42%
Here, the test error for KNN is out perform than what we have in the report (5%)
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
treebase = DecisionTreeClassifier(criterion='entropy',max_depth=15)
ada_clf = AdaBoostClassifier(n_estimators=100,learning_rate=0.3, base_estimator=treebase)
ada_clf.fit(X_train_data,Y_train_label)
AdaBoostClassifier(base_estimator=DecisionTreeClassifier(criterion='entropy',
max_depth=15),
learning_rate=0.3, n_estimators=100)
ada_clf.score(X_test_data,Y_test_label)
0.9644
print(f'Test error for AdaBoost Alogrithm is {round((1-0.9659)*100,3)}%')
Test error for AdaBoost Alogrithm is 3.41%
from sklearn.svm import SVC
svm = SVC(kernel='rbf',cache_size=500)
svm.fit(X_train_data,Y_train_label)
SVC(cache_size=500)
svm.score(X_test_data,Y_test_label)
0.9792
print(f'Test error for SVM Alogrithm is {round((1-0.9799)*100,3)}%')
Test error for SVM Alogrithm is 2.01%
Here, the test error for SVM is higher than what we have in the report (1.4%). This happens may due to the choice of different random state that some train splits have a better performance than the others.
from sklearn.ensemble import RandomForestClassifier
rf_clf = RandomForestClassifier(bootstrap=True, max_depth=30,
max_features='sqrt', max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=5,
min_weight_fraction_leaf=0.0, n_estimators=1400,
n_jobs=None, oob_score=False, random_state=42, verbose=0,
warm_start=False)
rf_clf.fit(X_train_data,Y_train_label)
RandomForestClassifier(max_depth=30, max_features='sqrt', min_samples_split=5,
n_estimators=1400, random_state=42)
rf_clf.score(X_test_data,Y_test_label)
0.9717
print('test set accuracy(Random Forest): 0.9717')
test set accuracy(Random Forest): 0.9717
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
pipe1 = Pipeline([('pca',PCA(n_components=2)), # reduce dimensions
('lr',LogisticRegression(solver='saga',
max_iter=1000)), # classifier
])
X_trainnew =[]
X_testnew =[]
for i in range(len(x_train_normal)):
X_trainnew.append(x_train_normal[i].flatten())
for i in range(len(x_test_normal)):
X_testnew.append(x_test_normal[i].flatten())
# Set it as array
X_trainnew = np.array(X_trainnew)
X_testnew = np.array(X_testnew)
params1 = {'pca__n_components':[2,10,20,30,40,50],
'lr__penalty':['l1','l2'],
'lr__C':[.01,1,10,100]}
gscv = GridSearchCV(pipe1, params1, cv=3).fit(X_trainnew,Y_train_label)
gscv.best_params_
/Users/fuwang/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
warnings.warn("The max_iter was reached which means "
/Users/fuwang/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
warnings.warn("The max_iter was reached which means "
/Users/fuwang/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
warnings.warn("The max_iter was reached which means "
/Users/fuwang/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
warnings.warn("The max_iter was reached which means "
/Users/fuwang/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
warnings.warn("The max_iter was reached which means "
{'lr__C': 10, 'lr__penalty': 'l2', 'pca__n_components': 50}
score = gscv.score(X_testnew,Y_test_label)
print(f'test set accuracy(Logistic Regression): {score:0.3f}')
test set accuracy(Logistic Regression): 0.912
import tensorflow as tf
from collections import deque
from tensorflow.keras import Input
from tensorflow.keras import layers,Model
model = tf.keras.Sequential()
model.add(layers.Flatten(input_shape = [28,28]))
tf.keras.layers.Conv2D(16,(3,3),padding = 'same', activation = 'relu')
tf.keras.layers.Conv2D(32,(7,7),padding = 'same', activation = 'softmax')
model.add(tf.keras.layers.Dense(units = 250, activation = 'relu'))
model.add(tf.keras.layers.Dense(units = 250, activation = 'softmax'))
model.compile(optimizer = "Adam", loss = "sparse_categorical_crossentropy",metrics = ['accuracy'])
history = model.fit(x_train_normal,Y_train_label, epochs = 20,validation_data = 0)
model.evaluate(x_test_normal,Y_test_label)
Epoch 1/20 1875/1875 [==============================] - 4s 2ms/step - loss: 0.2705 - accuracy: 0.9258 Epoch 2/20 1875/1875 [==============================] - 3s 2ms/step - loss: 0.1028 - accuracy: 0.9702 Epoch 3/20 1875/1875 [==============================] - 3s 2ms/step - loss: 0.0697 - accuracy: 0.9785 Epoch 4/20 1875/1875 [==============================] - 4s 2ms/step - loss: 0.0510 - accuracy: 0.9842 Epoch 5/20 1875/1875 [==============================] - 3s 2ms/step - loss: 0.0382 - accuracy: 0.9882 Epoch 6/20 1875/1875 [==============================] - 3s 2ms/step - loss: 0.0282 - accuracy: 0.9909 Epoch 7/20 1875/1875 [==============================] - 3s 2ms/step - loss: 0.0230 - accuracy: 0.9932 Epoch 8/20 1875/1875 [==============================] - 3s 2ms/step - loss: 0.0182 - accuracy: 0.9943 Epoch 9/20 1875/1875 [==============================] - 3s 2ms/step - loss: 0.0146 - accuracy: 0.9954 Epoch 10/20 1875/1875 [==============================] - 4s 2ms/step - loss: 0.0115 - accuracy: 0.9966 Epoch 11/20 1875/1875 [==============================] - 4s 2ms/step - loss: 0.0105 - accuracy: 0.9965 Epoch 12/20 1875/1875 [==============================] - 4s 2ms/step - loss: 0.0089 - accuracy: 0.9973 Epoch 13/20 1875/1875 [==============================] - 4s 2ms/step - loss: 0.0079 - accuracy: 0.9974 Epoch 14/20 1875/1875 [==============================] - 4s 2ms/step - loss: 0.0072 - accuracy: 0.9978 Epoch 15/20 1875/1875 [==============================] - 3s 2ms/step - loss: 0.0049 - accuracy: 0.9985 Epoch 16/20 1875/1875 [==============================] - 3s 2ms/step - loss: 0.0081 - accuracy: 0.9972 Epoch 17/20 1875/1875 [==============================] - 3s 2ms/step - loss: 0.0058 - accuracy: 0.9980 Epoch 18/20 1875/1875 [==============================] - 3s 2ms/step - loss: 0.0057 - accuracy: 0.9979 Epoch 19/20 1875/1875 [==============================] - 4s 2ms/step - loss: 0.0053 - accuracy: 0.9981 Epoch 20/20 1875/1875 [==============================] - 4s 2ms/step - loss: 0.0053 - accuracy: 0.9982 313/313 [==============================] - 0s 914us/step - loss: 0.0996 - accuracy: 0.9808
[0.09960538148880005, 0.9807999730110168]
print(f'Test error for CNN Alogrithm is {round((1-0.9807000160217285)*100,3)}%')
Test error for CNN Alogrithm is 1.93%
Here, by using CNN Algotithm, the test error finally out perform all three of the classifier I implemented in part 2(a)
The first plot is the average training/test cross-entropy error vs number of epochs
The second plot is the classification error (in percentage) vs. number of epochs,
import torch
import torch.nn as nn
import torch.nn.functional as F # adds some efficiency
from torch.utils.data import DataLoader # lets us load data in batches
from torchvision import datasets, transforms
from matplotlib.pyplot import figure
transform = transforms.ToTensor()
train_data = datasets.MNIST(root='../Data', train=True, download=True, transform=transform)
train_data
test_data = datasets.MNIST(root='../Data', train=False, download=True, transform=transform)
test_data
Dataset MNIST
Number of datapoints: 10000
Root location: ../Data
Split: Test
StandardTransform
Transform: ToTensor()
class Model(nn.Module):
def __init__(self, in_features = 784, h1 = 100, out_features = 10):
# one single layer neural network
super().__init__()
self.fc1 = nn.Linear(in_features,h1)
self.out = nn.Linear(h1,out_features)
def forward(self,x):
x = F.relu(self.fc1(x))
x = self.out(x)
return F.log_softmax(x, dim=1)
# initialize the model we defined and define the loss
model = Model()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
model
Model( (fc1): Linear(in_features=784, out_features=100, bias=True) (out): Linear(in_features=100, out_features=10, bias=True) )
torch.manual_seed(101) # for reproduce
train_loader = DataLoader(train_data, batch_size=100, shuffle=True)
test_loader = DataLoader(test_data, batch_size=500, shuffle=False)
# 1st trainning
import time
start_time = time.time()
torch.manual_seed(42)
epochs = 150
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = model(X_train.view(100, -1)) # Here we flatten X_train
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%200 == 0 and i%30 == 0:
print(f'epoch: {i:2} batch: {b:4} [{100*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(100*b):7.3f}%')
# Update train loss & accuracy for the epoch
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = model(X_test.view(500, -1)) # Here we flatten X_test
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
# Update test loss & accuracy for the epoch
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 200 [ 20000/60000] loss: 0.39356124 accuracy: 79.700% epoch: 0 batch: 400 [ 40000/60000] loss: 0.33373317 accuracy: 84.562% epoch: 0 batch: 600 [ 60000/60000] loss: 0.33077142 accuracy: 86.598% epoch: 30 batch: 200 [ 20000/60000] loss: 0.04866486 accuracy: 99.275% epoch: 30 batch: 400 [ 40000/60000] loss: 0.02032987 accuracy: 99.312% epoch: 30 batch: 600 [ 60000/60000] loss: 0.01567509 accuracy: 99.292% epoch: 60 batch: 200 [ 20000/60000] loss: 0.00707829 accuracy: 99.910% epoch: 60 batch: 400 [ 40000/60000] loss: 0.00752921 accuracy: 99.915% epoch: 60 batch: 600 [ 60000/60000] loss: 0.00548673 accuracy: 99.922% epoch: 90 batch: 200 [ 20000/60000] loss: 0.00654491 accuracy: 99.995% epoch: 90 batch: 400 [ 40000/60000] loss: 0.00246160 accuracy: 99.995% epoch: 90 batch: 600 [ 60000/60000] loss: 0.00059715 accuracy: 99.995% epoch: 120 batch: 200 [ 20000/60000] loss: 0.00101315 accuracy: 100.000% epoch: 120 batch: 400 [ 40000/60000] loss: 0.00057673 accuracy: 100.000% epoch: 120 batch: 600 [ 60000/60000] loss: 0.00502245 accuracy: 100.000% Duration: 854 seconds
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(8, 8), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
figure(figsize=(8, 8), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
print(f'epoch with the best reslut: {max_index}')
print(f'best test accuracy is: {test_cc[max_index]}')
epoch with the best reslut: 35 best test accuracy is: 98.02999877929688
# 2nd trainning
start_time = time.time()
torch.manual_seed(44)
epochs = 150
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = model(X_train.view(100, -1)) # Here to flatten X_train
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%200 == 0 and i%30 == 0:
print(f'epoch: {i:2} batch: {b:4} [{100*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(100*b):7.3f}%')
# Update train loss & accuracy for the epoch
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = model(X_test.view(500, -1)) # Here we flatten X_test
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
# Update test loss & accuracy for the epoch
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 200 [ 20000/60000] loss: 0.00142337 accuracy: 100.000% epoch: 0 batch: 400 [ 40000/60000] loss: 0.00238367 accuracy: 100.000% epoch: 0 batch: 600 [ 60000/60000] loss: 0.00128410 accuracy: 100.000% epoch: 30 batch: 200 [ 20000/60000] loss: 0.00131633 accuracy: 100.000% epoch: 30 batch: 400 [ 40000/60000] loss: 0.00242944 accuracy: 100.000% epoch: 30 batch: 600 [ 60000/60000] loss: 0.00039310 accuracy: 100.000% epoch: 60 batch: 200 [ 20000/60000] loss: 0.00183145 accuracy: 100.000% epoch: 60 batch: 400 [ 40000/60000] loss: 0.00088600 accuracy: 100.000% epoch: 60 batch: 600 [ 60000/60000] loss: 0.00136517 accuracy: 100.000% epoch: 90 batch: 200 [ 20000/60000] loss: 0.00090845 accuracy: 100.000% epoch: 90 batch: 400 [ 40000/60000] loss: 0.00091003 accuracy: 100.000% epoch: 90 batch: 600 [ 60000/60000] loss: 0.00106497 accuracy: 100.000% epoch: 120 batch: 200 [ 20000/60000] loss: 0.00103335 accuracy: 100.000% epoch: 120 batch: 400 [ 40000/60000] loss: 0.00028356 accuracy: 100.000% epoch: 120 batch: 600 [ 60000/60000] loss: 0.00005926 accuracy: 100.000% Duration: 859 seconds
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(8, 8), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
figure(figsize=(8, 8), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
print(f'epoch with the best reslut: {max_index}')
print(f'best test accuracy is: {test_cc[max_index]}')
# 3rd trainning
start_time = time.time()
torch.manual_seed(52)
epochs = 150
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = model(X_train.view(100, -1)) # Here we flatten X_train
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%200 == 0 and i%30 == 0:
print(f'epoch: {i:2} batch: {b:4} [{100*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(100*b):7.3f}%')
# Update train loss & accuracy for the epoch
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = model(X_test.view(500, -1)) # Here we flatten X_test
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
# Update test loss & accuracy for the epoch
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 200 [ 20000/60000] loss: 0.00083876 accuracy: 100.000% epoch: 0 batch: 400 [ 40000/60000] loss: 0.00108778 accuracy: 100.000% epoch: 0 batch: 600 [ 60000/60000] loss: 0.00057049 accuracy: 100.000% epoch: 30 batch: 200 [ 20000/60000] loss: 0.00027540 accuracy: 100.000% epoch: 30 batch: 400 [ 40000/60000] loss: 0.00073948 accuracy: 100.000% epoch: 30 batch: 600 [ 60000/60000] loss: 0.00030312 accuracy: 100.000% epoch: 60 batch: 200 [ 20000/60000] loss: 0.00029385 accuracy: 100.000% epoch: 60 batch: 400 [ 40000/60000] loss: 0.00017869 accuracy: 100.000% epoch: 60 batch: 600 [ 60000/60000] loss: 0.00011337 accuracy: 100.000% epoch: 90 batch: 200 [ 20000/60000] loss: 0.00026005 accuracy: 100.000% epoch: 90 batch: 400 [ 40000/60000] loss: 0.00027494 accuracy: 100.000% epoch: 90 batch: 600 [ 60000/60000] loss: 0.00042380 accuracy: 100.000% epoch: 120 batch: 200 [ 20000/60000] loss: 0.00062476 accuracy: 100.000% epoch: 120 batch: 400 [ 40000/60000] loss: 0.00048714 accuracy: 100.000% epoch: 120 batch: 600 [ 60000/60000] loss: 0.00018773 accuracy: 100.000% Duration: 847 seconds
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(8, 8), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
figure(figsize=(8, 8), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
print(f'epoch with the best reslut: {max_index}')
print(f'best test accuracy is: {test_cc[max_index]}')
# 4th trainning
start_time = time.time()
torch.manual_seed(22)
epochs = 150
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = model(X_train.view(100, -1)) # Here we flatten X_train
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%200 == 0 and i%30 == 0:
print(f'epoch: {i:2} batch: {b:4} [{100*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(100*b):7.3f}%')
# Update train loss & accuracy for the epoch
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = model(X_test.view(500, -1)) # Here we flatten X_test
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
# Update test loss & accuracy for the epoch
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 200 [ 20000/60000] loss: 0.00015524 accuracy: 100.000% epoch: 0 batch: 400 [ 40000/60000] loss: 0.00037434 accuracy: 100.000% epoch: 0 batch: 600 [ 60000/60000] loss: 0.00047704 accuracy: 100.000% epoch: 30 batch: 200 [ 20000/60000] loss: 0.00004869 accuracy: 100.000% epoch: 30 batch: 400 [ 40000/60000] loss: 0.00019146 accuracy: 100.000% epoch: 30 batch: 600 [ 60000/60000] loss: 0.00025352 accuracy: 100.000% epoch: 60 batch: 200 [ 20000/60000] loss: 0.00075640 accuracy: 100.000% epoch: 60 batch: 400 [ 40000/60000] loss: 0.00017650 accuracy: 100.000% epoch: 60 batch: 600 [ 60000/60000] loss: 0.00029315 accuracy: 100.000% epoch: 90 batch: 200 [ 20000/60000] loss: 0.00025815 accuracy: 100.000% epoch: 90 batch: 400 [ 40000/60000] loss: 0.00020435 accuracy: 100.000% epoch: 90 batch: 600 [ 60000/60000] loss: 0.00039428 accuracy: 100.000% epoch: 120 batch: 200 [ 20000/60000] loss: 0.00019074 accuracy: 100.000% epoch: 120 batch: 400 [ 40000/60000] loss: 0.00026550 accuracy: 100.000% epoch: 120 batch: 600 [ 60000/60000] loss: 0.00061750 accuracy: 100.000% Duration: 844 seconds
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(8, 8), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
figure(figsize=(8, 8), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
print(f'epoch with the best reslut: {max_index}')
print(f'best test accuracy is: {test_cc[max_index]}')
# 5th trainning
start_time = time.time()
torch.manual_seed(77)
epochs = 150
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = model(X_train.view(100, -1)) # Here we flatten X_train
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%200 == 0 and i%30 == 0:
print(f'epoch: {i:2} batch: {b:4} [{100*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(100*b):7.3f}%')
# Update train loss & accuracy for the epoch
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = model(X_test.view(500, -1)) # Here we flatten X_test
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
# Update test loss & accuracy for the epoch
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 200 [ 20000/60000] loss: 0.00032810 accuracy: 100.000% epoch: 0 batch: 400 [ 40000/60000] loss: 0.00021325 accuracy: 100.000% epoch: 0 batch: 600 [ 60000/60000] loss: 0.00039641 accuracy: 100.000% epoch: 30 batch: 200 [ 20000/60000] loss: 0.00031812 accuracy: 100.000% epoch: 30 batch: 400 [ 40000/60000] loss: 0.00024417 accuracy: 100.000% epoch: 30 batch: 600 [ 60000/60000] loss: 0.00013397 accuracy: 100.000% epoch: 60 batch: 200 [ 20000/60000] loss: 0.00018389 accuracy: 100.000% epoch: 60 batch: 400 [ 40000/60000] loss: 0.00034384 accuracy: 100.000% epoch: 60 batch: 600 [ 60000/60000] loss: 0.00011700 accuracy: 100.000% epoch: 90 batch: 200 [ 20000/60000] loss: 0.00020042 accuracy: 100.000% epoch: 90 batch: 400 [ 40000/60000] loss: 0.00009319 accuracy: 100.000% epoch: 90 batch: 600 [ 60000/60000] loss: 0.00013690 accuracy: 100.000% epoch: 120 batch: 200 [ 20000/60000] loss: 0.00018510 accuracy: 100.000% epoch: 120 batch: 400 [ 40000/60000] loss: 0.00016446 accuracy: 100.000% epoch: 120 batch: 600 [ 60000/60000] loss: 0.00020553 accuracy: 100.000% Duration: 878 seconds
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(8, 8), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
figure(figsize=(8, 8), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
print(f'epoch with the best reslut: {max_index}')
print(f'best test accuracy is: {test_cc[max_index]}')
# 6th trainning
start_time = time.time()
torch.manual_seed(71)
epochs = 150
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = model(X_train.view(100, -1)) # Here we flatten X_train
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%200 == 0 and i%30 == 0:
print(f'epoch: {i:2} batch: {b:4} [{100*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(100*b):7.3f}%')
# Update train loss & accuracy for the epoch
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = model(X_test.view(500, -1)) # Here we flatten X_test
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
# Update test loss & accuracy for the epoch
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 200 [ 20000/60000] loss: 0.00021223 accuracy: 100.000% epoch: 0 batch: 400 [ 40000/60000] loss: 0.00030008 accuracy: 100.000% epoch: 0 batch: 600 [ 60000/60000] loss: 0.00011667 accuracy: 100.000% epoch: 30 batch: 200 [ 20000/60000] loss: 0.00009274 accuracy: 100.000% epoch: 30 batch: 400 [ 40000/60000] loss: 0.00016770 accuracy: 100.000% epoch: 30 batch: 600 [ 60000/60000] loss: 0.00026743 accuracy: 100.000% epoch: 60 batch: 200 [ 20000/60000] loss: 0.00006439 accuracy: 100.000% epoch: 60 batch: 400 [ 40000/60000] loss: 0.00037118 accuracy: 100.000% epoch: 60 batch: 600 [ 60000/60000] loss: 0.00020274 accuracy: 100.000% epoch: 90 batch: 200 [ 20000/60000] loss: 0.00018378 accuracy: 100.000% epoch: 90 batch: 400 [ 40000/60000] loss: 0.00023930 accuracy: 100.000% epoch: 90 batch: 600 [ 60000/60000] loss: 0.00012491 accuracy: 100.000% epoch: 120 batch: 200 [ 20000/60000] loss: 0.00023352 accuracy: 100.000% epoch: 120 batch: 400 [ 40000/60000] loss: 0.00018539 accuracy: 100.000% epoch: 120 batch: 600 [ 60000/60000] loss: 0.00008256 accuracy: 100.000% Duration: 885 seconds
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(8, 8), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
figure(figsize=(8, 8), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
print(f'epoch with the best reslut: {max_index}')
print(f'best test accuracy is: {test_cc[max_index]}')
By comparing to those two different types of plots we can find out that for plots in (a), the validation loss starts to increase after a point. In my model at learning rate 0.1, it starts the increasing trend at the beginning of the 2nd trainning and loss gradually getting more and more during the rest training.
For the plots in (b), we will find out that the misclassification error (in percentage) eventually converged to a certain percentage, in this model it's around 2%, and it does not change much as we keep doing more trainnings
As a result, this model obtain its best prediction in around the 4th trainning.
from sklearn.metrics import confusion_matrix
from torchvision.utils import make_grid
# Extract the data all at once, not in batches
test_load_all = DataLoader(test_data, batch_size=10000, shuffle=False)
with torch.no_grad():
correct = 0
for X_test, y_test in test_load_all:
y_val = model(X_test.view(len(X_test), -1)) # pass in a flattened view of X_test
predicted = torch.max(y_val,1)[1]
correct += (predicted == y_test).sum()
print(f'Test accuracy: {correct.item()}/{len(test_data)} = {correct.item()*100/(len(test_data)):7.3f}%')
Test accuracy: 9797/10000 = 97.970%
# print a row of values for reference
np.set_printoptions(formatter=dict(int=lambda x: f'{x:4}'))
print(np.arange(10).reshape(1,10))
print()
# print the confusion matrix
print(confusion_matrix(predicted.view(-1), y_test.view(-1)))
[[ 0 1 2 3 4 5 6 7 8 9]] [[ 971 0 4 0 1 3 4 1 5 0] [ 0 1124 2 0 0 0 2 2 1 3] [ 1 3 1009 4 3 0 0 7 2 0] [ 1 1 1 989 0 7 1 4 7 3] [ 1 0 2 0 965 2 5 0 3 7] [ 0 0 0 4 0 867 2 0 4 3] [ 2 1 3 0 3 5 943 0 4 1] [ 1 1 4 3 2 1 1 1004 2 3] [ 2 5 6 3 2 5 0 4 943 7] [ 1 0 1 7 6 2 0 6 3 982]]
right_guess = np.array([])
for i in range(len(predicted.view(-1))):
if predicted[i] == y_test[i]:
right_guess = np.append(right_guess,i).astype('int64')
# Display the number of corrected guess
len(right_guess)
9797
# Set up an iterator to feed batched rows
r = 100 # row size
row = iter(np.array_split(right_guess,len(right_guess)//r+1))
nextrow = next(row)
# print("Index:", nextrow)
print("Label:", y_test.index_select(0,torch.tensor(nextrow)).numpy())
print("Guess:", predicted.index_select(0,torch.tensor(nextrow)).numpy())
images = X_test.index_select(0,torch.tensor(nextrow))
im = make_grid(images, nrow=10)
plt.figure(figsize=(30,12))
plt.imshow(np.transpose(im.numpy(), (1, 2, 0)));
Label: [ 7 2 1 0 4 1 4 9 5 9 0 6 9 0
1 5 9 7 3 4 9 6 6 5 4 0 7 4
0 1 3 1 3 4 7 2 7 1 2 1 1 7
4 2 3 5 1 2 4 4 6 3 5 5 6 0
4 1 9 5 7 8 9 3 7 4 6 4 3 0
7 0 2 9 1 7 3 2 9 7 7 6 2 7
8 4 7 3 6 1 3 6 9 3 1 4 1 7
6 9]
Guess: [ 7 2 1 0 4 1 4 9 5 9 0 6 9 0
1 5 9 7 3 4 9 6 6 5 4 0 7 4
0 1 3 1 3 4 7 2 7 1 2 1 1 7
4 2 3 5 1 2 4 4 6 3 5 5 6 0
4 1 9 5 7 8 9 3 7 4 6 4 3 0
7 0 2 9 1 7 3 2 9 7 7 6 2 7
8 4 7 3 6 1 3 6 9 3 1 4 1 7
6 9]
weights = []
for wei in model.parameters():
weights.append(wei.detach().cpu().numpy())
weights[0].shape
(100, 784)
weights[0].reshape(10,10,28,28).shape
(10, 10, 28, 28)
# Assemble to images.
fc_images = weights[0].reshape(10, 10, 28, 28)
pad_images = np.zeros((10, 10, 32, 32))
pad_images[:, :, 2:30, 2:30] = fc_images
fc_vis = np.transpose(pad_images, (0, 2, 1, 3)).reshape(320, 320)
# Show weights.
plt.figure(figsize=(12, 12))
plt.imshow(fc_vis, cmap='gray')
plt.title("Learned W for single layers.")
plt.axis("off")
plt.show()
For computational efficiency, I will only test those parameters invidually and remains the other parameters the same as what I did in part a&b instead of testing all the combinations.
model = Model()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
torch.manual_seed(101) # for reproduce
train_loader = DataLoader(train_data, batch_size=100, shuffle=True)
test_loader = DataLoader(test_data, batch_size=500, shuffle=False)
# 1st trainning
import time
start_time = time.time()
torch.manual_seed(42)
epochs = 150
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = model(X_train.view(100, -1)) # Here to flatten X_train
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%200 == 0 and i%30 == 0:
print(f'epoch: {i:2} batch: {b:4} [{100*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(100*b):7.3f}%')
# Update train loss & accuracy for the epoch
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = model(X_test.view(500, -1)) # Here we flatten X_test
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
# Update test loss & accuracy for the epoch
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 200 [ 20000/60000] loss: 1.84344923 accuracy: 46.445% epoch: 0 batch: 400 [ 40000/60000] loss: 1.29715204 accuracy: 60.015% epoch: 0 batch: 600 [ 60000/60000] loss: 0.95308661 accuracy: 66.705% epoch: 30 batch: 200 [ 20000/60000] loss: 0.18246056 accuracy: 94.595% epoch: 30 batch: 400 [ 40000/60000] loss: 0.12887622 accuracy: 94.655% epoch: 30 batch: 600 [ 60000/60000] loss: 0.20161596 accuracy: 94.653% epoch: 60 batch: 200 [ 20000/60000] loss: 0.14518161 accuracy: 96.380% epoch: 60 batch: 400 [ 40000/60000] loss: 0.17732511 accuracy: 96.460% epoch: 60 batch: 600 [ 60000/60000] loss: 0.12826741 accuracy: 96.465% epoch: 90 batch: 200 [ 20000/60000] loss: 0.12456366 accuracy: 97.395% epoch: 90 batch: 400 [ 40000/60000] loss: 0.05908205 accuracy: 97.457% epoch: 90 batch: 600 [ 60000/60000] loss: 0.05060227 accuracy: 97.407% epoch: 120 batch: 200 [ 20000/60000] loss: 0.05129230 accuracy: 98.070% epoch: 120 batch: 400 [ 40000/60000] loss: 0.02773865 accuracy: 97.993% epoch: 120 batch: 600 [ 60000/60000] loss: 0.12297091 accuracy: 98.002% Duration: 868 seconds
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(8, 8), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
figure(figsize=(8, 8), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
max_index
145
test_cc[max_index]
tensor(97.4600)
model = Model()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.2)
torch.manual_seed(101) # for reproduce
train_loader = DataLoader(train_data, batch_size=100, shuffle=True)
test_loader = DataLoader(test_data, batch_size=500, shuffle=False)
# 1st trainning
import time
start_time = time.time()
torch.manual_seed(42)
epochs = 150
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = model(X_train.view(100, -1)) # Here to flatten X_train
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%200 == 0 and i%30 == 0:
print(f'epoch: {i:2} batch: {b:4} [{100*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(100*b):7.3f}%')
# Update train loss & accuracy for the epoch
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = model(X_test.view(500, -1)) # Here we flatten X_test
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
# Update test loss & accuracy for the epoch
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 200 [ 20000/60000] loss: 0.33184668 accuracy: 83.310% epoch: 0 batch: 400 [ 40000/60000] loss: 0.26827747 accuracy: 86.910% epoch: 0 batch: 600 [ 60000/60000] loss: 0.25720170 accuracy: 88.675% epoch: 30 batch: 200 [ 20000/60000] loss: 0.01170894 accuracy: 99.910% epoch: 30 batch: 400 [ 40000/60000] loss: 0.00960306 accuracy: 99.905% epoch: 30 batch: 600 [ 60000/60000] loss: 0.00358533 accuracy: 99.905% epoch: 60 batch: 200 [ 20000/60000] loss: 0.00224586 accuracy: 100.000% epoch: 60 batch: 400 [ 40000/60000] loss: 0.00261511 accuracy: 100.000% epoch: 60 batch: 600 [ 60000/60000] loss: 0.00239754 accuracy: 100.000% epoch: 90 batch: 200 [ 20000/60000] loss: 0.00242839 accuracy: 100.000% epoch: 90 batch: 400 [ 40000/60000] loss: 0.00090159 accuracy: 100.000% epoch: 90 batch: 600 [ 60000/60000] loss: 0.00031360 accuracy: 100.000% epoch: 120 batch: 200 [ 20000/60000] loss: 0.00029939 accuracy: 100.000% epoch: 120 batch: 400 [ 40000/60000] loss: 0.00031279 accuracy: 100.000% epoch: 120 batch: 600 [ 60000/60000] loss: 0.00118260 accuracy: 100.000% Duration: 859 seconds
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(8, 8), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
figure(figsize=(8, 8), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
max_index
test_cc[max_index]
46
test_cc[max_index]
tensor(98.0600)
model = Model()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.5)
# 1st trainning
import time
start_time = time.time()
torch.manual_seed(42)
epochs = 150
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = model(X_train.view(100, -1)) # Here to flatten X_train
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%200 == 0 and i%30 == 0:
print(f'epoch: {i:2} batch: {b:4} [{100*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(100*b):7.3f}%')
# Update train loss & accuracy for the epoch
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = model(X_test.view(500, -1)) # Here we flatten X_test
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
# Update test loss & accuracy for the epoch
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 200 [ 20000/60000] loss: 0.28557912 accuracy: 84.925% epoch: 0 batch: 400 [ 40000/60000] loss: 0.16596991 accuracy: 88.882% epoch: 0 batch: 600 [ 60000/60000] loss: 0.14686930 accuracy: 90.790% epoch: 30 batch: 200 [ 20000/60000] loss: 0.00130168 accuracy: 100.000% epoch: 30 batch: 400 [ 40000/60000] loss: 0.00146324 accuracy: 100.000% epoch: 30 batch: 600 [ 60000/60000] loss: 0.00114082 accuracy: 100.000% epoch: 60 batch: 200 [ 20000/60000] loss: 0.00050415 accuracy: 100.000% epoch: 60 batch: 400 [ 40000/60000] loss: 0.00048773 accuracy: 100.000% epoch: 60 batch: 600 [ 60000/60000] loss: 0.00030979 accuracy: 100.000% epoch: 90 batch: 200 [ 20000/60000] loss: 0.00044649 accuracy: 100.000% epoch: 90 batch: 400 [ 40000/60000] loss: 0.00018102 accuracy: 100.000% epoch: 90 batch: 600 [ 60000/60000] loss: 0.00011979 accuracy: 100.000% epoch: 120 batch: 200 [ 20000/60000] loss: 0.00014493 accuracy: 100.000% epoch: 120 batch: 400 [ 40000/60000] loss: 0.00009779 accuracy: 100.000% epoch: 120 batch: 600 [ 60000/60000] loss: 0.00041777 accuracy: 100.000% Duration: 858 seconds
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(8, 8), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
figure(figsize=(8, 8), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
max_index
19
test_cc[max_index]
tensor(98.0200)
Which is the same as what we did in part a&b
model = Model()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1,momentum = 0.0)
torch.manual_seed(101) # for reproduce
train_loader = DataLoader(train_data, batch_size=100, shuffle=True)
test_loader = DataLoader(test_data, batch_size=500, shuffle=False)
# 1st trainning
import time
start_time = time.time()
torch.manual_seed(42)
epochs = 150
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = model(X_train.view(100, -1)) # Here to flatten X_train
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%200 == 0 and i%30 == 0:
print(f'epoch: {i:2} batch: {b:4} [{100*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(100*b):7.3f}%')
# Update train loss & accuracy for the epoch
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = model(X_test.view(500, -1)) # Here we flatten X_test
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
# Update test loss & accuracy for the epoch
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 200 [ 20000/60000] loss: 0.38246700 accuracy: 79.740% epoch: 0 batch: 400 [ 40000/60000] loss: 0.33883560 accuracy: 84.558% epoch: 0 batch: 600 [ 60000/60000] loss: 0.32703426 accuracy: 86.600% epoch: 30 batch: 200 [ 20000/60000] loss: 0.03754072 accuracy: 99.545% epoch: 30 batch: 400 [ 40000/60000] loss: 0.01524021 accuracy: 99.530% epoch: 30 batch: 600 [ 60000/60000] loss: 0.00761934 accuracy: 99.503% epoch: 60 batch: 200 [ 20000/60000] loss: 0.00364678 accuracy: 99.970% epoch: 60 batch: 400 [ 40000/60000] loss: 0.00740200 accuracy: 99.970% epoch: 60 batch: 600 [ 60000/60000] loss: 0.00483748 accuracy: 99.967% epoch: 90 batch: 200 [ 20000/60000] loss: 0.00558591 accuracy: 99.995% epoch: 90 batch: 400 [ 40000/60000] loss: 0.00181210 accuracy: 99.998% epoch: 90 batch: 600 [ 60000/60000] loss: 0.00049784 accuracy: 99.997% epoch: 120 batch: 200 [ 20000/60000] loss: 0.00116861 accuracy: 100.000% epoch: 120 batch: 400 [ 40000/60000] loss: 0.00055369 accuracy: 100.000% epoch: 120 batch: 600 [ 60000/60000] loss: 0.00354827 accuracy: 100.000% Duration: 899 seconds
figure(figsize=(8, 8), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
max_index
95
test_cc[max_index]
tensor(97.9500)
test_cc[max_index]
tensor(97.9500)
model = Model()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.5)
torch.manual_seed(101) # for reproduce
train_loader = DataLoader(train_data, batch_size=100, shuffle=True)
test_loader = DataLoader(test_data, batch_size=500, shuffle=False)
# 1st trainning
import time
start_time = time.time()
torch.manual_seed(42)
epochs = 150
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = model(X_train.view(100, -1)) # Here to flatten X_train
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%200 == 0 and i%30 == 0:
print(f'epoch: {i:2} batch: {b:4} [{100*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(100*b):7.3f}%')
# Update train loss & accuracy for the epoch
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = model(X_test.view(500, -1)) # Here we flatten X_test
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
# Update test loss & accuracy for the epoch
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 200 [ 20000/60000] loss: 0.33728120 accuracy: 83.720% epoch: 0 batch: 400 [ 40000/60000] loss: 0.26278970 accuracy: 87.200% epoch: 0 batch: 600 [ 60000/60000] loss: 0.24586096 accuracy: 88.880% epoch: 30 batch: 200 [ 20000/60000] loss: 0.01231473 accuracy: 99.920% epoch: 30 batch: 400 [ 40000/60000] loss: 0.00870520 accuracy: 99.910% epoch: 30 batch: 600 [ 60000/60000] loss: 0.00567897 accuracy: 99.902% epoch: 60 batch: 200 [ 20000/60000] loss: 0.00174549 accuracy: 100.000% epoch: 60 batch: 400 [ 40000/60000] loss: 0.00175070 accuracy: 100.000% epoch: 60 batch: 600 [ 60000/60000] loss: 0.00153420 accuracy: 99.998% epoch: 90 batch: 200 [ 20000/60000] loss: 0.00198455 accuracy: 100.000% epoch: 90 batch: 400 [ 40000/60000] loss: 0.00094433 accuracy: 100.000% epoch: 90 batch: 600 [ 60000/60000] loss: 0.00039620 accuracy: 100.000% epoch: 120 batch: 200 [ 20000/60000] loss: 0.00052787 accuracy: 100.000% epoch: 120 batch: 400 [ 40000/60000] loss: 0.00034777 accuracy: 100.000% epoch: 120 batch: 600 [ 60000/60000] loss: 0.00147643 accuracy: 100.000% Duration: 890 seconds
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(8, 8), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
figure(figsize=(8, 8), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
figure(figsize=(8, 8), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
max_index
126
test_cc[max_index]
tensor(98.)
model = Model()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
torch.manual_seed(101) # for reproduce
train_loader = DataLoader(train_data, batch_size=100, shuffle=True)
test_loader = DataLoader(test_data, batch_size=500, shuffle=False)
# 1st trainning
import time
start_time = time.time()
torch.manual_seed(42)
epochs = 150
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = model(X_train.view(100, -1)) # Here to flatten X_train
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%200 == 0 and i%30 == 0:
print(f'epoch: {i:2} batch: {b:4} [{100*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(100*b):7.3f}%')
# Update train loss & accuracy for the epoch
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = model(X_test.view(500, -1)) # Here we flatten X_test
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
# Update test loss & accuracy for the epoch
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 200 [ 20000/60000] loss: 0.22042771 accuracy: 86.535% epoch: 0 batch: 400 [ 40000/60000] loss: 0.13824029 accuracy: 90.177% epoch: 0 batch: 600 [ 60000/60000] loss: 0.22732435 accuracy: 91.877% epoch: 30 batch: 200 [ 20000/60000] loss: 0.00040831 accuracy: 100.000% epoch: 30 batch: 400 [ 40000/60000] loss: 0.00031916 accuracy: 100.000% epoch: 30 batch: 600 [ 60000/60000] loss: 0.00007860 accuracy: 100.000% epoch: 60 batch: 200 [ 20000/60000] loss: 0.00014098 accuracy: 100.000% epoch: 60 batch: 400 [ 40000/60000] loss: 0.00018104 accuracy: 100.000% epoch: 60 batch: 600 [ 60000/60000] loss: 0.00007145 accuracy: 100.000% epoch: 90 batch: 200 [ 20000/60000] loss: 0.00017235 accuracy: 100.000% epoch: 90 batch: 400 [ 40000/60000] loss: 0.00007656 accuracy: 100.000% epoch: 90 batch: 600 [ 60000/60000] loss: 0.00004981 accuracy: 100.000% epoch: 120 batch: 200 [ 20000/60000] loss: 0.00004457 accuracy: 100.000% epoch: 120 batch: 400 [ 40000/60000] loss: 0.00003208 accuracy: 100.000% epoch: 120 batch: 600 [ 60000/60000] loss: 0.00011608 accuracy: 100.000% Duration: 902 seconds
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(8, 8), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
figure(figsize=(8, 8), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
max_index
18
test_cc[max_index]
tensor(98.2200)
Since CNN works with immages, we would like to have relatively small batches
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)
class ConvolutionalNetwork(nn.Module):
def __init__(self):
# one 2-D convolutional layers
super().__init__()
self.conv1 = nn.Conv2d(1, 16, 3, 1)
self.out = nn.Linear(13*13*16,10)
def forward(self, X):
# one 2-D convolutional layers -> Relu activation -> Maxpooling
X = F.relu(self.conv1(X))
X = F.max_pool2d(X, 2, 2)
X = X.view(-1,13*13*16)
X = self.out(X)
return F.log_softmax(X, dim=1)
torch.manual_seed(42)
cnn_model = ConvolutionalNetwork()
cnn_model
ConvolutionalNetwork( (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1)) (out): Linear(in_features=2704, out_features=10, bias=True) )
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(cnn_model.parameters(), lr=0.1)
import time
start_time = time.time()
epochs = 90
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = cnn_model(X_train) # we don't flatten X-train here
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%6000 == 0 and i%30==0:
print(f'epoch: {i:2} batch: {b:4} [{10*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(10*b):7.3f}%')
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = cnn_model(X_test)
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 6000 [ 60000/60000] loss: 0.03005195 accuracy: 95.060% epoch: 30 batch: 6000 [ 60000/60000] loss: 0.00009294 accuracy: 99.967% epoch: 60 batch: 6000 [ 60000/60000] loss: 0.00000144 accuracy: 100.000% Duration: 1671 seconds
## loss
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(15, 10), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
## Misclassification
figure(figsize=(15, 10), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
# Extract the data all at once, not in batches
test_load_all = DataLoader(test_data, batch_size=10000, shuffle=False)
with torch.no_grad():
correct = 0
for X_test, y_test in test_load_all:
y_val = cnn_model(X_test) # we don't flatten the data this time
predicted = torch.max(y_val,1)[1]
correct += (predicted == y_test).sum()
print(f'Test accuracy: {correct.item()}/{len(test_data)} = {correct.item()*100/(len(test_data)):7.3f}%')
Test accuracy: 9837/10000 = 98.370%
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
print(f'epoch with the best reslut: {max_index}')
print(f'best test accuracy is: {test_cc[max_index]}')
epoch with the best reslut: 33 best test accuracy is: 98.51000213623047
As we can see that CNN converges faster than ANN and so there is no need to use that many epochs to train the data.
weights = []
for wei in cnn_model.parameters():
weights.append(wei.detach().cpu().numpy())
weights[2].shape
(10, 2704)
# Assemble to images.
fc_images = weights[2].reshape(10, 4, 26, 26)
pad_images = np.zeros((10, 4, 30, 30))
pad_images[:, :, 4:30, 4:30] = fc_images
fc_vis = np.transpose(pad_images, (0, 2, 1, 3)).reshape(300, 120)
# Show weights.
figure(figsize=(15, 20), dpi=80)
plt.figure(figsize=(12, 12))
plt.imshow(fc_vis, cmap='gray')
plt.title("Learned W for single layers.")
plt.axis("off")
plt.show()
<Figure size 1200x1600 with 0 Axes>
torch.manual_seed(42)
cnn_model = ConvolutionalNetwork()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(cnn_model.parameters(), lr=0.01)
import time
start_time = time.time()
epochs = 90
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = cnn_model(X_train) # we don't flatten X-train here
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%600 == 0 and i%30==0:
print(f'epoch: {i:2} batch: {b:4} [{10*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(10*b):7.3f}%')
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = cnn_model(X_test)
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 600 [ 6000/60000] loss: 0.50055379 accuracy: 77.100% epoch: 0 batch: 1200 [ 12000/60000] loss: 0.23785670 accuracy: 82.625% epoch: 0 batch: 1800 [ 18000/60000] loss: 0.58887845 accuracy: 84.589% epoch: 0 batch: 2400 [ 24000/60000] loss: 0.46157807 accuracy: 85.796% epoch: 0 batch: 3000 [ 30000/60000] loss: 1.07298303 accuracy: 86.640% epoch: 0 batch: 3600 [ 36000/60000] loss: 0.55626756 accuracy: 87.231% epoch: 0 batch: 4200 [ 42000/60000] loss: 0.18289649 accuracy: 87.736% epoch: 0 batch: 4800 [ 48000/60000] loss: 0.55979478 accuracy: 88.100% epoch: 0 batch: 5400 [ 54000/60000] loss: 0.33777782 accuracy: 88.496% epoch: 0 batch: 6000 [ 60000/60000] loss: 0.19239928 accuracy: 88.852% epoch: 30 batch: 600 [ 6000/60000] loss: 0.00091206 accuracy: 99.083% epoch: 30 batch: 1200 [ 12000/60000] loss: 0.04468038 accuracy: 99.125% epoch: 30 batch: 1800 [ 18000/60000] loss: 0.09834365 accuracy: 99.133% epoch: 30 batch: 2400 [ 24000/60000] loss: 0.02118845 accuracy: 99.096% epoch: 30 batch: 3000 [ 30000/60000] loss: 0.00617437 accuracy: 99.140% epoch: 30 batch: 3600 [ 36000/60000] loss: 0.01184299 accuracy: 99.153% epoch: 30 batch: 4200 [ 42000/60000] loss: 0.08602176 accuracy: 99.129% epoch: 30 batch: 4800 [ 48000/60000] loss: 0.00307850 accuracy: 99.148% epoch: 30 batch: 5400 [ 54000/60000] loss: 0.00642804 accuracy: 99.163% epoch: 30 batch: 6000 [ 60000/60000] loss: 0.01421716 accuracy: 99.162% epoch: 60 batch: 600 [ 6000/60000] loss: 0.00008791 accuracy: 99.650% epoch: 60 batch: 1200 [ 12000/60000] loss: 0.00747563 accuracy: 99.667% epoch: 60 batch: 1800 [ 18000/60000] loss: 0.00424720 accuracy: 99.694% epoch: 60 batch: 2400 [ 24000/60000] loss: 0.00013221 accuracy: 99.683% epoch: 60 batch: 3000 [ 30000/60000] loss: 0.00062624 accuracy: 99.667% epoch: 60 batch: 3600 [ 36000/60000] loss: 0.01200708 accuracy: 99.672% epoch: 60 batch: 4200 [ 42000/60000] loss: 0.00073420 accuracy: 99.674% epoch: 60 batch: 4800 [ 48000/60000] loss: 0.00017492 accuracy: 99.660% epoch: 60 batch: 5400 [ 54000/60000] loss: 0.00232189 accuracy: 99.663% epoch: 60 batch: 6000 [ 60000/60000] loss: 0.00281465 accuracy: 99.665% Duration: 1378 seconds
## loss
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(15, 10), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
## Misclassification
figure(figsize=(15, 10), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
with torch.no_grad():
correct = 0
for X_test, y_test in test_load_all:
y_val = cnn_model(X_test) # we don't flatten the data this time
predicted = torch.max(y_val,1)[1]
correct += (predicted == y_test).sum()
print(f'Test accuracy: {correct.item()}/{len(test_data)} = {correct.item()*100/(len(test_data)):7.3f}%')
Test accuracy: 9809/10000 = 98.090%
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
print(f'epoch with the best reslut: {max_index}')
print(f'best test accuracy is: {test_cc[max_index]}')
epoch with the best reslut: 28 best test accuracy is: 98.37000274658203
torch.manual_seed(42)
cnn_model = ConvolutionalNetwork()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(cnn_model.parameters(), lr=0.2)
import time
start_time = time.time()
epochs = 90
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = cnn_model(X_train) # we don't flatten X-train here
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%600 == 0 and i%30==0:
print(f'epoch: {i:2} batch: {b:4} [{10*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(10*b):7.3f}%')
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = cnn_model(X_test)
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 600 [ 6000/60000] loss: 0.50748360 accuracy: 84.983% epoch: 0 batch: 1200 [ 12000/60000] loss: 0.19890423 accuracy: 88.975% epoch: 0 batch: 1800 [ 18000/60000] loss: 0.13588084 accuracy: 90.733% epoch: 0 batch: 2400 [ 24000/60000] loss: 0.65372181 accuracy: 91.600% epoch: 0 batch: 3000 [ 30000/60000] loss: 0.37344941 accuracy: 92.317% epoch: 0 batch: 3600 [ 36000/60000] loss: 0.24777575 accuracy: 92.747% epoch: 0 batch: 4200 [ 42000/60000] loss: 0.07369316 accuracy: 93.162% epoch: 0 batch: 4800 [ 48000/60000] loss: 0.75884545 accuracy: 93.463% epoch: 0 batch: 5400 [ 54000/60000] loss: 0.00106479 accuracy: 93.706% epoch: 0 batch: 6000 [ 60000/60000] loss: 0.06649792 accuracy: 93.942% epoch: 30 batch: 600 [ 6000/60000] loss: 0.00001135 accuracy: 99.283% epoch: 30 batch: 1200 [ 12000/60000] loss: 0.16929491 accuracy: 99.408% epoch: 30 batch: 1800 [ 18000/60000] loss: 0.00398919 accuracy: 99.361% epoch: 30 batch: 2400 [ 24000/60000] loss: 0.00005879 accuracy: 99.258% epoch: 30 batch: 3000 [ 30000/60000] loss: 0.00004606 accuracy: 99.267% epoch: 30 batch: 3600 [ 36000/60000] loss: 0.00011499 accuracy: 99.289% epoch: 30 batch: 4200 [ 42000/60000] loss: 0.00004142 accuracy: 99.255% epoch: 30 batch: 4800 [ 48000/60000] loss: 0.00000582 accuracy: 99.196% epoch: 30 batch: 5400 [ 54000/60000] loss: 0.00007318 accuracy: 99.159% epoch: 30 batch: 6000 [ 60000/60000] loss: 0.25528628 accuracy: 99.147% epoch: 60 batch: 600 [ 6000/60000] loss: 0.00000002 accuracy: 99.467% epoch: 60 batch: 1200 [ 12000/60000] loss: 0.00018255 accuracy: 99.417% epoch: 60 batch: 1800 [ 18000/60000] loss: 0.00000323 accuracy: 99.489% epoch: 60 batch: 2400 [ 24000/60000] loss: 0.00000000 accuracy: 99.513% epoch: 60 batch: 3000 [ 30000/60000] loss: 0.00000041 accuracy: 99.480% epoch: 60 batch: 3600 [ 36000/60000] loss: 0.09184793 accuracy: 99.461% epoch: 60 batch: 4200 [ 42000/60000] loss: 0.00095631 accuracy: 99.455% epoch: 60 batch: 4800 [ 48000/60000] loss: 0.00000000 accuracy: 99.458% epoch: 60 batch: 5400 [ 54000/60000] loss: 0.00000087 accuracy: 99.448% epoch: 60 batch: 6000 [ 60000/60000] loss: 0.00000011 accuracy: 99.417% Duration: 1396 seconds
## loss
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(15, 10), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
## Misclassification
figure(figsize=(15, 10), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
with torch.no_grad():
correct = 0
for X_test, y_test in test_load_all:
y_val = cnn_model(X_test) # we don't flatten the data this time
predicted = torch.max(y_val,1)[1]
correct += (predicted == y_test).sum()
print(f'Test accuracy: {correct.item()}/{len(test_data)} = {correct.item()*100/(len(test_data)):7.3f}%')
Test accuracy: 9736/10000 = 97.360%
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
print(f'epoch with the best reslut: {max_index}')
print(f'best test accuracy is: {test_cc[max_index]}')
epoch with the best reslut: 12 best test accuracy is: 97.81999969482422
torch.manual_seed(42)
cnn_model = ConvolutionalNetwork()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(cnn_model.parameters(), lr=0.5)
import time
start_time = time.time()
epochs = 90
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = cnn_model(X_train) # we don't flatten X-train here
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%600 == 0 and i%30==0:
print(f'epoch: {i:2} batch: {b:4} [{10*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(10*b):7.3f}%')
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = cnn_model(X_test)
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 600 [ 6000/60000] loss: 2.36434174 accuracy: 55.417% epoch: 0 batch: 1200 [ 12000/60000] loss: 2.30287886 accuracy: 33.033% epoch: 0 batch: 1800 [ 18000/60000] loss: 2.31059504 accuracy: 25.622% epoch: 0 batch: 2400 [ 24000/60000] loss: 2.26221704 accuracy: 21.800% epoch: 0 batch: 3000 [ 30000/60000] loss: 2.33690834 accuracy: 19.470% epoch: 0 batch: 3600 [ 36000/60000] loss: 2.30793858 accuracy: 17.975% epoch: 0 batch: 4200 [ 42000/60000] loss: 2.29981470 accuracy: 16.871% epoch: 0 batch: 4800 [ 48000/60000] loss: 2.28068233 accuracy: 16.044% epoch: 0 batch: 5400 [ 54000/60000] loss: 2.36272764 accuracy: 15.428% epoch: 0 batch: 6000 [ 60000/60000] loss: 2.33977842 accuracy: 14.873% epoch: 30 batch: 600 [ 6000/60000] loss: 2.26986527 accuracy: 11.050% epoch: 30 batch: 1200 [ 12000/60000] loss: 2.36418343 accuracy: 10.508% epoch: 30 batch: 1800 [ 18000/60000] loss: 2.34814286 accuracy: 10.539% epoch: 30 batch: 2400 [ 24000/60000] loss: 2.28387308 accuracy: 10.546% epoch: 30 batch: 3000 [ 30000/60000] loss: 2.30334210 accuracy: 10.517% epoch: 30 batch: 3600 [ 36000/60000] loss: 2.31064558 accuracy: 10.453% epoch: 30 batch: 4200 [ 42000/60000] loss: 2.28635597 accuracy: 10.483% epoch: 30 batch: 4800 [ 48000/60000] loss: 2.26440382 accuracy: 10.452% epoch: 30 batch: 5400 [ 54000/60000] loss: 2.27078319 accuracy: 10.476% epoch: 30 batch: 6000 [ 60000/60000] loss: 2.16125536 accuracy: 10.427% epoch: 60 batch: 600 [ 6000/60000] loss: 2.36254191 accuracy: 9.967% epoch: 60 batch: 1200 [ 12000/60000] loss: 2.30099297 accuracy: 9.833% epoch: 60 batch: 1800 [ 18000/60000] loss: 2.35656476 accuracy: 10.061% epoch: 60 batch: 2400 [ 24000/60000] loss: 2.35182571 accuracy: 10.183% epoch: 60 batch: 3000 [ 30000/60000] loss: 2.33413601 accuracy: 10.243% epoch: 60 batch: 3600 [ 36000/60000] loss: 2.31949043 accuracy: 10.194% epoch: 60 batch: 4200 [ 42000/60000] loss: 2.29400134 accuracy: 10.174% epoch: 60 batch: 4800 [ 48000/60000] loss: 2.34535074 accuracy: 10.252% epoch: 60 batch: 5400 [ 54000/60000] loss: 2.28369641 accuracy: 10.241% epoch: 60 batch: 6000 [ 60000/60000] loss: 2.32897258 accuracy: 10.310% Duration: 1543 seconds
## loss
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(15, 10), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
## Misclassification
figure(figsize=(15, 10), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
print(f'epoch with the best reslut: {max_index}')
print(f'best test accuracy is: {test_cc[max_index]}')
epoch with the best reslut: 0 best test accuracy is: 11.350000381469727
torch.manual_seed(42)
cnn_model = ConvolutionalNetwork()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(cnn_model.parameters(), lr=0.1,momentum=0.0)
import time
start_time = time.time()
epochs = 90
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = cnn_model(X_train) # we don't flatten X-train here
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%600 == 0 and i%30==0:
print(f'epoch: {i:2} batch: {b:4} [{10*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(10*b):7.3f}%')
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = cnn_model(X_test)
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 600 [ 6000/60000] loss: 0.45184571 accuracy: 84.733% epoch: 0 batch: 1200 [ 12000/60000] loss: 0.27163389 accuracy: 89.283% epoch: 0 batch: 1800 [ 18000/60000] loss: 0.17342015 accuracy: 91.372% epoch: 0 batch: 2400 [ 24000/60000] loss: 0.28377706 accuracy: 92.450% epoch: 0 batch: 3000 [ 30000/60000] loss: 0.19070533 accuracy: 93.230% epoch: 0 batch: 3600 [ 36000/60000] loss: 0.08363827 accuracy: 93.739% epoch: 0 batch: 4200 [ 42000/60000] loss: 0.05873642 accuracy: 94.186% epoch: 0 batch: 4800 [ 48000/60000] loss: 0.32344499 accuracy: 94.540% epoch: 0 batch: 5400 [ 54000/60000] loss: 0.01723357 accuracy: 94.833% epoch: 0 batch: 6000 [ 60000/60000] loss: 0.03005195 accuracy: 95.060% epoch: 30 batch: 600 [ 6000/60000] loss: 0.00002879 accuracy: 100.000% epoch: 30 batch: 1200 [ 12000/60000] loss: 0.00096078 accuracy: 100.000% epoch: 30 batch: 1800 [ 18000/60000] loss: 0.00169025 accuracy: 99.989% epoch: 30 batch: 2400 [ 24000/60000] loss: 0.00003062 accuracy: 99.983% epoch: 30 batch: 3000 [ 30000/60000] loss: 0.00008538 accuracy: 99.980% epoch: 30 batch: 3600 [ 36000/60000] loss: 0.00002615 accuracy: 99.975% epoch: 30 batch: 4200 [ 42000/60000] loss: 0.00213923 accuracy: 99.976% epoch: 30 batch: 4800 [ 48000/60000] loss: 0.00005493 accuracy: 99.975% epoch: 30 batch: 5400 [ 54000/60000] loss: 0.00092084 accuracy: 99.969% epoch: 30 batch: 6000 [ 60000/60000] loss: 0.00009294 accuracy: 99.967% epoch: 60 batch: 600 [ 6000/60000] loss: 0.00000005 accuracy: 100.000% epoch: 60 batch: 1200 [ 12000/60000] loss: 0.00000802 accuracy: 100.000% epoch: 60 batch: 1800 [ 18000/60000] loss: 0.00000880 accuracy: 100.000% epoch: 60 batch: 2400 [ 24000/60000] loss: 0.00000179 accuracy: 100.000% epoch: 60 batch: 3000 [ 30000/60000] loss: 0.00000151 accuracy: 100.000% epoch: 60 batch: 3600 [ 36000/60000] loss: 0.00040442 accuracy: 100.000% epoch: 60 batch: 4200 [ 42000/60000] loss: 0.00003173 accuracy: 100.000% epoch: 60 batch: 4800 [ 48000/60000] loss: 0.00000000 accuracy: 100.000% epoch: 60 batch: 5400 [ 54000/60000] loss: 0.00000440 accuracy: 100.000% epoch: 60 batch: 6000 [ 60000/60000] loss: 0.00000144 accuracy: 100.000% Duration: 1375 seconds
## loss
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(15, 10), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
## Misclassification
figure(figsize=(15, 10), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
with torch.no_grad():
correct = 0
for X_test, y_test in test_load_all:
y_val = cnn_model(X_test) # we don't flatten the data this time
predicted = torch.max(y_val,1)[1]
correct += (predicted == y_test).sum()
print(f'Test accuracy: {correct.item()}/{len(test_data)} = {correct.item()*100/(len(test_data)):7.3f}%')
Test accuracy: 9837/10000 = 98.370%
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
print(f'epoch with the best reslut: {max_index}')
print(f'best test accuracy is: {test_cc[max_index]}')
epoch with the best reslut: 33 best test accuracy is: 98.51000213623047
torch.manual_seed(42)
cnn_model = ConvolutionalNetwork()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(cnn_model.parameters(), lr=0.1,momentum=0.5)
import time
start_time = time.time()
epochs = 90
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = cnn_model(X_train) # we don't flatten X-train here
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%600 == 0 and i%30==0:
print(f'epoch: {i:2} batch: {b:4} [{10*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(10*b):7.3f}%')
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = cnn_model(X_test)
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 600 [ 6000/60000] loss: 0.38100421 accuracy: 87.233% epoch: 0 batch: 1200 [ 12000/60000] loss: 0.56988084 accuracy: 91.058% epoch: 0 batch: 1800 [ 18000/60000] loss: 0.17154136 accuracy: 92.706% epoch: 0 batch: 2400 [ 24000/60000] loss: 0.18195778 accuracy: 93.487% epoch: 0 batch: 3000 [ 30000/60000] loss: 0.28279927 accuracy: 94.087% epoch: 0 batch: 3600 [ 36000/60000] loss: 0.05191164 accuracy: 94.508% epoch: 0 batch: 4200 [ 42000/60000] loss: 0.03662794 accuracy: 94.869% epoch: 0 batch: 4800 [ 48000/60000] loss: 0.37893689 accuracy: 95.127% epoch: 0 batch: 5400 [ 54000/60000] loss: 0.01030162 accuracy: 95.359% epoch: 0 batch: 6000 [ 60000/60000] loss: 0.01470476 accuracy: 95.558% epoch: 30 batch: 600 [ 6000/60000] loss: 0.00000018 accuracy: 99.900% epoch: 30 batch: 1200 [ 12000/60000] loss: 0.00016474 accuracy: 99.908% epoch: 30 batch: 1800 [ 18000/60000] loss: 0.00033261 accuracy: 99.878% epoch: 30 batch: 2400 [ 24000/60000] loss: 0.00001888 accuracy: 99.838% epoch: 30 batch: 3000 [ 30000/60000] loss: 0.00002187 accuracy: 99.847% epoch: 30 batch: 3600 [ 36000/60000] loss: 0.00000058 accuracy: 99.856% epoch: 30 batch: 4200 [ 42000/60000] loss: 0.00848248 accuracy: 99.824% epoch: 30 batch: 4800 [ 48000/60000] loss: 0.00015804 accuracy: 99.810% epoch: 30 batch: 5400 [ 54000/60000] loss: 0.00000012 accuracy: 99.783% epoch: 30 batch: 6000 [ 60000/60000] loss: 0.00079844 accuracy: 99.765% epoch: 60 batch: 600 [ 6000/60000] loss: 0.00000000 accuracy: 100.000% epoch: 60 batch: 1200 [ 12000/60000] loss: 0.00000020 accuracy: 100.000% epoch: 60 batch: 1800 [ 18000/60000] loss: 0.00000004 accuracy: 100.000% epoch: 60 batch: 2400 [ 24000/60000] loss: 0.00000000 accuracy: 100.000% epoch: 60 batch: 3000 [ 30000/60000] loss: 0.00000004 accuracy: 100.000% epoch: 60 batch: 3600 [ 36000/60000] loss: 0.00003574 accuracy: 100.000% epoch: 60 batch: 4200 [ 42000/60000] loss: 0.00000066 accuracy: 100.000% epoch: 60 batch: 4800 [ 48000/60000] loss: 0.00000000 accuracy: 100.000% epoch: 60 batch: 5400 [ 54000/60000] loss: 0.00000035 accuracy: 100.000% epoch: 60 batch: 6000 [ 60000/60000] loss: 0.00000000 accuracy: 100.000% Duration: 1445 seconds
## loss
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(15, 10), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
## Misclassification
figure(figsize=(15, 10), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
with torch.no_grad():
correct = 0
for X_test, y_test in test_load_all:
y_val = cnn_model(X_test) # we don't flatten the data this time
predicted = torch.max(y_val,1)[1]
correct += (predicted == y_test).sum()
print(f'Test accuracy: {correct.item()}/{len(test_data)} = {correct.item()*100/(len(test_data)):7.3f}%')
Test accuracy: 9830/10000 = 98.300%
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
print(f'epoch with the best reslut: {max_index}')
print(f'best test accuracy is: {test_cc[max_index]}')
epoch with the best reslut: 83 best test accuracy is: 98.30000305175781
torch.manual_seed(42)
cnn_model = ConvolutionalNetwork()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(cnn_model.parameters(),lr=0.1,momentum=0.9)
import time
start_time = time.time()
epochs = 90
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = cnn_model(X_train) # we don't flatten X-train here
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%600 == 0 and i%30==0:
print(f'epoch: {i:2} batch: {b:4} [{10*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(10*b):7.3f}%')
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = cnn_model(X_test)
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 600 [ 6000/60000] loss: 1.90247846 accuracy: 67.167% epoch: 0 batch: 1200 [ 12000/60000] loss: 0.92838877 accuracy: 71.808% epoch: 0 batch: 1800 [ 18000/60000] loss: 2.36757088 accuracy: 66.417% epoch: 0 batch: 2400 [ 24000/60000] loss: 2.21867037 accuracy: 52.371% epoch: 0 batch: 3000 [ 30000/60000] loss: 2.42204690 accuracy: 44.607% epoch: 0 batch: 3600 [ 36000/60000] loss: 2.29519200 accuracy: 38.914% epoch: 0 batch: 4200 [ 42000/60000] loss: 2.30479574 accuracy: 34.805% epoch: 0 batch: 4800 [ 48000/60000] loss: 2.29844713 accuracy: 31.644% epoch: 0 batch: 5400 [ 54000/60000] loss: 2.42215109 accuracy: 29.200% epoch: 0 batch: 6000 [ 60000/60000] loss: 2.37732625 accuracy: 27.283% epoch: 30 batch: 600 [ 6000/60000] loss: 2.26486731 accuracy: 11.283% epoch: 30 batch: 1200 [ 12000/60000] loss: 2.44942808 accuracy: 11.050% epoch: 30 batch: 1800 [ 18000/60000] loss: 2.38380122 accuracy: 10.778% epoch: 30 batch: 2400 [ 24000/60000] loss: 2.26300478 accuracy: 10.688% epoch: 30 batch: 3000 [ 30000/60000] loss: 2.31075883 accuracy: 10.560% epoch: 30 batch: 3600 [ 36000/60000] loss: 2.31996846 accuracy: 10.400% epoch: 30 batch: 4200 [ 42000/60000] loss: 2.31422019 accuracy: 10.357% epoch: 30 batch: 4800 [ 48000/60000] loss: 2.29717588 accuracy: 10.329% epoch: 30 batch: 5400 [ 54000/60000] loss: 2.24746466 accuracy: 10.387% epoch: 30 batch: 6000 [ 60000/60000] loss: 2.09660673 accuracy: 10.343% epoch: 60 batch: 600 [ 6000/60000] loss: 2.36210394 accuracy: 9.950% epoch: 60 batch: 1200 [ 12000/60000] loss: 2.45200300 accuracy: 9.883% epoch: 60 batch: 1800 [ 18000/60000] loss: 2.37003541 accuracy: 9.994% epoch: 60 batch: 2400 [ 24000/60000] loss: 2.34025693 accuracy: 10.129% epoch: 60 batch: 3000 [ 30000/60000] loss: 2.29944062 accuracy: 10.127% epoch: 60 batch: 3600 [ 36000/60000] loss: 2.36059070 accuracy: 10.150% epoch: 60 batch: 4200 [ 42000/60000] loss: 2.22132754 accuracy: 10.157% epoch: 60 batch: 4800 [ 48000/60000] loss: 2.35959172 accuracy: 10.294% epoch: 60 batch: 5400 [ 54000/60000] loss: 2.23351288 accuracy: 10.257% epoch: 60 batch: 6000 [ 60000/60000] loss: 2.38958621 accuracy: 10.295% Duration: 1417 seconds
## loss
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(15, 10), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
## Misclassification
figure(figsize=(15, 10), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
with torch.no_grad():
correct = 0
for X_test, y_test in test_load_all:
y_val = cnn_model(X_test) # we don't flatten the data this time
predicted = torch.max(y_val,1)[1]
correct += (predicted == y_test).sum()
print(f'Test accuracy: {correct.item()}/{len(test_data)} = {correct.item()*100/(len(test_data)):7.3f}%')
Test accuracy: 1135/10000 = 11.350%
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
print(f'epoch with the best reslut: {max_index}')
print(f'best test accuracy is: {test_cc[max_index]}')
epoch with the best reslut: 0 best test accuracy is: 11.350000381469727
As I'm using optim.Adam as my optimizer and therefore I will skip the test for momentums
train_loader = DataLoader(train_data, batch_size=20, shuffle=True)
test_loader = DataLoader(test_data, batch_size=20, shuffle=False)
## two 2-D convolutional layers,two fully connected hidden layers
class ConvolutionalNetwork(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 16, 3, 1)
self.conv2 = nn.Conv2d(16, 25, 3, 1)
self.fc1 = nn.Linear(5*5*25, 200)
self.fc2 = nn.Linear(200, 100)
self.out = nn.Linear(100,10)
def forward(self, X):
X = F.relu(self.conv1(X))
X = F.max_pool2d(X, 2, 2)
X = F.relu(self.conv2(X))
X = F.max_pool2d(X, 2, 2)
X = X.view(-1, 5*5*25)
X = F.relu(self.fc1(X))
X = F.relu(self.fc2(X))
X = self.out(X)
return F.log_softmax(X, dim=1)
torch.manual_seed(42)
cnn_model = ConvolutionalNetwork()
cnn_model
ConvolutionalNetwork( (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1)) (conv2): Conv2d(16, 25, kernel_size=(3, 3), stride=(1, 1)) (fc1): Linear(in_features=625, out_features=200, bias=True) (fc2): Linear(in_features=200, out_features=100, bias=True) (out): Linear(in_features=100, out_features=10, bias=True) )
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_model.parameters(), lr=0.011)
import time
start_time = time.time()
epochs = 90
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = cnn_model(X_train) # we don't flatten X-train here
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%6000 == 0 and i%30==0:
print(f'epoch: {i:2} batch: {b:4} [{10*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(10*b):7.3f}%')
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = cnn_model(X_test)
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
Duration: 2093 seconds
## loss
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(15, 10), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
## misclassification
figure(figsize=(15, 10), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
# Extract the data all at once, not in batches
test_load_all = DataLoader(test_data, batch_size=10000, shuffle=False)
with torch.no_grad():
correct = 0
for X_test, y_test in test_load_all:
y_val = cnn_model(X_test) # we don't flatten the data this time
predicted = torch.max(y_val,1)[1]
correct += (predicted == y_test).sum()
print(f'Test accuracy: {correct.item()}/{len(test_data)} = {correct.item()*100/(len(test_data)):7.3f}%')
Test accuracy: 9426/10000 = 94.260%
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)
for i, (X_train, y_train) in enumerate(train_data):
break
X = X_train.view(1,1,28,28)
print(x.shape)
torch.Size([1, 1, 28, 28])
conv1 = nn.Conv2d(1, 6, 3, 1)
conv2 = nn.Conv2d(6, 16, 3, 1)
fc1 = nn.Linear(5*5*16, 120)
fc2 = nn.Linear(120, 84)
fc3 = nn.Linear(84,10)
X = F.relu(conv1(X))
print(X.shape)
X = F.max_pool2d(X, 2, 2)
print(X.shape)
X = F.relu(conv2(X))
print(X.shape)
X = F.max_pool2d(X, 2, 2)
print(X.shape)
X = X.view(-1, 5*5*16)
print(X.shape)
X = F.relu(fc1(X))
print(X.shape)
X = F.relu(fc2(X))
print(X.shape)
X = fc3(X)
print(X.shape)
torch.Size([1, 6, 26, 26]) torch.Size([1, 6, 13, 13]) torch.Size([1, 16, 11, 11]) torch.Size([1, 16, 5, 5]) torch.Size([1, 400]) torch.Size([1, 120]) torch.Size([1, 84]) torch.Size([1, 10])
class ConvolutionalNetwork(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 6, 3, 1)
self.conv2 = nn.Conv2d(6, 16, 3, 1)
self.fc1 = nn.Linear(5*5*16, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84,10)
def forward(self, X):
X = F.relu(self.conv1(X))
X = F.max_pool2d(X, 2, 2)
X = F.relu(self.conv2(X))
X = F.max_pool2d(X, 2, 2)
X = X.view(-1, 5*5*16)
X = F.relu(self.fc1(X))
X = F.relu(self.fc2(X))
X = self.fc3(X)
return F.log_softmax(X, dim=1)
torch.manual_seed(42)
cnn_model = ConvolutionalNetwork()
cnn_model
ConvolutionalNetwork( (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1)) (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1)) (fc1): Linear(in_features=400, out_features=120, bias=True) (fc2): Linear(in_features=120, out_features=84, bias=True) (fc3): Linear(in_features=84, out_features=10, bias=True) )
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_model.parameters(), lr=0.001)
import time
start_time = time.time()
torch.manual_seed(42)
epochs = 10
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = cnn_model(X_train) # we don't flatten X-train here
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%600 == 0:
print(f'epoch: {i:2} batch: {b:4} [{10*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(10*b):7.3f}%')
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
# Apply the model
y_val = cnn_model(X_test)
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 600 [ 6000/60000] loss: 0.08654555 accuracy: 77.800% epoch: 0 batch: 1200 [ 12000/60000] loss: 0.57689154 accuracy: 85.225% epoch: 0 batch: 1800 [ 18000/60000] loss: 0.19149570 accuracy: 88.356% epoch: 0 batch: 2400 [ 24000/60000] loss: 0.12121473 accuracy: 90.121% epoch: 0 batch: 3000 [ 30000/60000] loss: 0.17241535 accuracy: 91.283% epoch: 0 batch: 3600 [ 36000/60000] loss: 0.12094252 accuracy: 92.153% epoch: 0 batch: 4200 [ 42000/60000] loss: 0.06916973 accuracy: 92.800% epoch: 0 batch: 4800 [ 48000/60000] loss: 0.02384527 accuracy: 93.348% epoch: 0 batch: 5400 [ 54000/60000] loss: 0.26314718 accuracy: 93.781% epoch: 0 batch: 6000 [ 60000/60000] loss: 0.08190775 accuracy: 94.137% epoch: 1 batch: 600 [ 6000/60000] loss: 0.02405228 accuracy: 97.733% epoch: 1 batch: 1200 [ 12000/60000] loss: 0.21290512 accuracy: 97.775% epoch: 1 batch: 1800 [ 18000/60000] loss: 0.00250192 accuracy: 97.806% epoch: 1 batch: 2400 [ 24000/60000] loss: 0.01160681 accuracy: 97.787% epoch: 1 batch: 3000 [ 30000/60000] loss: 0.01782202 accuracy: 97.753% epoch: 1 batch: 3600 [ 36000/60000] loss: 0.00819663 accuracy: 97.833% epoch: 1 batch: 4200 [ 42000/60000] loss: 0.06549872 accuracy: 97.852% epoch: 1 batch: 4800 [ 48000/60000] loss: 0.04398922 accuracy: 97.894% epoch: 1 batch: 5400 [ 54000/60000] loss: 0.09052375 accuracy: 97.906% epoch: 1 batch: 6000 [ 60000/60000] loss: 0.00158944 accuracy: 97.935% epoch: 2 batch: 600 [ 6000/60000] loss: 0.01085181 accuracy: 98.367% epoch: 2 batch: 1200 [ 12000/60000] loss: 0.20432079 accuracy: 98.442% epoch: 2 batch: 1800 [ 18000/60000] loss: 0.01796024 accuracy: 98.317% epoch: 2 batch: 2400 [ 24000/60000] loss: 0.00309465 accuracy: 98.308% epoch: 2 batch: 3000 [ 30000/60000] loss: 0.00074634 accuracy: 98.317% epoch: 2 batch: 3600 [ 36000/60000] loss: 0.00047460 accuracy: 98.325% epoch: 2 batch: 4200 [ 42000/60000] loss: 0.00006942 accuracy: 98.312% epoch: 2 batch: 4800 [ 48000/60000] loss: 0.01472613 accuracy: 98.375% epoch: 2 batch: 5400 [ 54000/60000] loss: 0.21501955 accuracy: 98.389% epoch: 2 batch: 6000 [ 60000/60000] loss: 0.39474851 accuracy: 98.407% epoch: 3 batch: 600 [ 6000/60000] loss: 0.00642153 accuracy: 98.883% epoch: 3 batch: 1200 [ 12000/60000] loss: 0.00019928 accuracy: 98.883% epoch: 3 batch: 1800 [ 18000/60000] loss: 0.06450208 accuracy: 98.806% epoch: 3 batch: 2400 [ 24000/60000] loss: 0.00098193 accuracy: 98.775% epoch: 3 batch: 3000 [ 30000/60000] loss: 0.00263088 accuracy: 98.777% epoch: 3 batch: 3600 [ 36000/60000] loss: 0.01515727 accuracy: 98.789% epoch: 3 batch: 4200 [ 42000/60000] loss: 0.01780676 accuracy: 98.757% epoch: 3 batch: 4800 [ 48000/60000] loss: 0.01939029 accuracy: 98.804% epoch: 3 batch: 5400 [ 54000/60000] loss: 0.00455618 accuracy: 98.798% epoch: 3 batch: 6000 [ 60000/60000] loss: 0.10137463 accuracy: 98.798% epoch: 4 batch: 600 [ 6000/60000] loss: 0.00698963 accuracy: 99.050% epoch: 4 batch: 1200 [ 12000/60000] loss: 0.00411027 accuracy: 99.058% epoch: 4 batch: 1800 [ 18000/60000] loss: 0.01088401 accuracy: 99.039% epoch: 4 batch: 2400 [ 24000/60000] loss: 0.01409585 accuracy: 99.050% epoch: 4 batch: 3000 [ 30000/60000] loss: 0.01023414 accuracy: 99.037% epoch: 4 batch: 3600 [ 36000/60000] loss: 0.10339715 accuracy: 99.081% epoch: 4 batch: 4200 [ 42000/60000] loss: 0.00043669 accuracy: 99.048% epoch: 4 batch: 4800 [ 48000/60000] loss: 0.02437181 accuracy: 99.004% epoch: 4 batch: 5400 [ 54000/60000] loss: 0.00009954 accuracy: 99.031% epoch: 4 batch: 6000 [ 60000/60000] loss: 0.00095954 accuracy: 99.030% epoch: 5 batch: 600 [ 6000/60000] loss: 0.00316558 accuracy: 99.067% epoch: 5 batch: 1200 [ 12000/60000] loss: 0.00002075 accuracy: 99.042% epoch: 5 batch: 1800 [ 18000/60000] loss: 0.01208581 accuracy: 99.172% epoch: 5 batch: 2400 [ 24000/60000] loss: 0.00067585 accuracy: 99.200% epoch: 5 batch: 3000 [ 30000/60000] loss: 0.10693151 accuracy: 99.187% epoch: 5 batch: 3600 [ 36000/60000] loss: 0.00406165 accuracy: 99.197% epoch: 5 batch: 4200 [ 42000/60000] loss: 0.00524017 accuracy: 99.188% epoch: 5 batch: 4800 [ 48000/60000] loss: 0.00002334 accuracy: 99.181% epoch: 5 batch: 5400 [ 54000/60000] loss: 0.00514738 accuracy: 99.167% epoch: 5 batch: 6000 [ 60000/60000] loss: 0.00004658 accuracy: 99.147% epoch: 6 batch: 600 [ 6000/60000] loss: 0.00141256 accuracy: 99.500% epoch: 6 batch: 1200 [ 12000/60000] loss: 0.09045164 accuracy: 99.442% epoch: 6 batch: 1800 [ 18000/60000] loss: 0.06866559 accuracy: 99.422% epoch: 6 batch: 2400 [ 24000/60000] loss: 0.23120797 accuracy: 99.392% epoch: 6 batch: 3000 [ 30000/60000] loss: 0.00009449 accuracy: 99.380% epoch: 6 batch: 3600 [ 36000/60000] loss: 0.00328448 accuracy: 99.406% epoch: 6 batch: 4200 [ 42000/60000] loss: 0.00025906 accuracy: 99.355% epoch: 6 batch: 4800 [ 48000/60000] loss: 0.00003165 accuracy: 99.315% epoch: 6 batch: 5400 [ 54000/60000] loss: 0.00682546 accuracy: 99.306% epoch: 6 batch: 6000 [ 60000/60000] loss: 0.04335001 accuracy: 99.272% epoch: 7 batch: 600 [ 6000/60000] loss: 0.00155591 accuracy: 99.500% epoch: 7 batch: 1200 [ 12000/60000] loss: 0.00000558 accuracy: 99.375% epoch: 7 batch: 1800 [ 18000/60000] loss: 0.00018507 accuracy: 99.333% epoch: 7 batch: 2400 [ 24000/60000] loss: 0.00010379 accuracy: 99.383% epoch: 7 batch: 3000 [ 30000/60000] loss: 0.00018956 accuracy: 99.377% epoch: 7 batch: 3600 [ 36000/60000] loss: 0.01091889 accuracy: 99.389% epoch: 7 batch: 4200 [ 42000/60000] loss: 0.00007026 accuracy: 99.410% epoch: 7 batch: 4800 [ 48000/60000] loss: 0.00198325 accuracy: 99.408% epoch: 7 batch: 5400 [ 54000/60000] loss: 0.40131879 accuracy: 99.396% epoch: 7 batch: 6000 [ 60000/60000] loss: 0.00113922 accuracy: 99.397% epoch: 8 batch: 600 [ 6000/60000] loss: 0.00160872 accuracy: 99.450% epoch: 8 batch: 1200 [ 12000/60000] loss: 0.00004530 accuracy: 99.500% epoch: 8 batch: 1800 [ 18000/60000] loss: 0.00108822 accuracy: 99.506% epoch: 8 batch: 2400 [ 24000/60000] loss: 0.00000005 accuracy: 99.521% epoch: 8 batch: 3000 [ 30000/60000] loss: 0.00004404 accuracy: 99.453% epoch: 8 batch: 3600 [ 36000/60000] loss: 0.00000112 accuracy: 99.461% epoch: 8 batch: 4200 [ 42000/60000] loss: 0.00470434 accuracy: 99.460% epoch: 8 batch: 4800 [ 48000/60000] loss: 0.06064100 accuracy: 99.440% epoch: 8 batch: 5400 [ 54000/60000] loss: 0.06903899 accuracy: 99.430% epoch: 8 batch: 6000 [ 60000/60000] loss: 0.00067342 accuracy: 99.447% epoch: 9 batch: 600 [ 6000/60000] loss: 0.00004338 accuracy: 99.833% epoch: 9 batch: 1200 [ 12000/60000] loss: 0.00043262 accuracy: 99.675% epoch: 9 batch: 1800 [ 18000/60000] loss: 0.00024330 accuracy: 99.628% epoch: 9 batch: 2400 [ 24000/60000] loss: 0.00000008 accuracy: 99.567% epoch: 9 batch: 3000 [ 30000/60000] loss: 0.05822434 accuracy: 99.583% epoch: 9 batch: 3600 [ 36000/60000] loss: 0.00018357 accuracy: 99.544% epoch: 9 batch: 4200 [ 42000/60000] loss: 0.00044983 accuracy: 99.512% epoch: 9 batch: 4800 [ 48000/60000] loss: 0.00003675 accuracy: 99.515% epoch: 9 batch: 5400 [ 54000/60000] loss: 0.00026334 accuracy: 99.472% epoch: 9 batch: 6000 [ 60000/60000] loss: 0.00000148 accuracy: 99.477% Duration: 227 seconds
## loss
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(15, 10), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
## misclassification
figure(figsize=(15, 10), dpi=80)
plt.plot([100-t/600 for t in train_correct], label='training misclassification error')
plt.plot([100-t/100 for t in test_correct], label='validation misclassification error')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
test_load_all = DataLoader(test_data, batch_size=10000, shuffle=False)
with torch.no_grad():
correct = 0
for X_test, y_test in test_load_all:
y_val = cnn_model(X_test)
predicted = torch.max(y_val,1)[1]
correct += (predicted == y_test).sum()
print(f'Test accuracy: {correct.item()}/{len(test_data)} = {correct.item()*100/(len(test_data)):7.3f}%')
Test accuracy: 9867/10000 = 98.670%
print(f'Test error: {round(100-98.67,3)}%, which is less than 1.4% we obtained from SVM with Gaussian Kernel' )
Test error: 1.33%, which is less than 1.4% we obtained from SVM with Gaussian Kernel
weights = []
for wei in cnn_model.parameters():
weights.append(wei.detach().cpu().numpy())
for wei in weights:
print(wei.shape)
(6, 1, 3, 3) (6,) (16, 6, 3, 3) (16,) (120, 400) (120,) (84, 120) (84,) (10, 84) (10,)
# Assemble to images.
fc_images = weights[4].reshape(10, 12, 20, 20)
pad_images = np.zeros((10, 12, 30, 30))
pad_images[:, :, 10:30, 10:30] = fc_images
fc_vis = np.transpose(pad_images, (0, 2, 1, 3)).reshape(300, 360)
# Show weights.
figure(figsize=(15, 20), dpi=80)
plt.figure(figsize=(12, 12))
plt.imshow(fc_vis, cmap='gray')
plt.title("Learned W for multiple layers.")
plt.axis("off")
plt.show()
<Figure size 1200x1600 with 0 Axes>
cnn_model.parameters()
<generator object Module.parameters at 0x7fcd7ebfecf0>
kernels = cnn_model.conv2.weight.detach()
cnn_model
ConvolutionalNetwork( (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1)) (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1)) (fc1): Linear(in_features=400, out_features=120, bias=True) (fc2): Linear(in_features=120, out_features=84, bias=True) (fc3): Linear(in_features=84, out_features=10, bias=True) )
kernels = cnn_model.conv1.weight.detach()
fig, axarr = plt.subplots(1,kernels.size(0))
fig.set_size_inches(10,10)
for idx in range(kernels.size(0)):
axarr[idx].imshow(kernels[idx].squeeze())
from torchvision.utils import make_grid
kernels = cnn_model.conv1.weight.detach().clone()
kernels = kernels - kernels.min()
kernels = kernels / kernels.max()
img = make_grid(kernels)
plt.imshow(img.permute(1, 2, 0))
img.shape
torch.Size([3, 7, 32])
from torchvision.utils import make_grid
kernels = cnn_model.conv2.weight.detach().clone()
kernels = kernels - kernels.min()
kernels = kernels / kernels.max()
img = make_grid(kernels)
# plt.imshow(img.permute(1,2,0))
fig, axarr = plt.subplots(img.shape[0])
fig.set_size_inches(15, 16)
img.shape
axarr[0].imshow(img[0,:])
axarr[1].imshow(img[1,:])
axarr[2].imshow(img[2,:])
axarr[3].imshow(img[3,:])
axarr[4].imshow(img[4,:])
axarr[5].imshow(img[5,:])
<matplotlib.image.AxesImage at 0x7fca3aea4fa0>
As I'm using optim.Adam as my optimizer and therefore I will skip the test for momentums,and also I'm using the accuracy plot instead of using the misclassification plot
from sklearn.preprocessing import StandardScaler
train_q5 = pd.read_table('/Users/fuwang/Documents/Columbia University/Spring 2022/STAT 5241/Project/Final Project/train.txt',
sep=',',names=range(1569))
train_path = '/Users/fuwang/Documents/Columbia University/Spring 2022/STAT 5241/Project/Final Project/train.txt'
train_q5.shape
(20000, 1569)
np.unique(train_q5.iloc[:,1568].values)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18])
val_q5 = pd.read_table('/Users/fuwang/Documents/Columbia University/Spring 2022/STAT 5241/Project/Final Project/val.txt',
sep=',',names=range(1569))
val_path = '/Users/fuwang/Documents/Columbia University/Spring 2022/STAT 5241/Project/Final Project/val.txt'
val_q5.shape
(5000, 1569)
np.unique(val_q5.iloc[:,1568].values)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18])
test_q5 = pd.read_table('/Users/fuwang/Documents/Columbia University/Spring 2022/STAT 5241/Project/Final Project/test.txt',
sep=',',names=range(1569))
test_path = '/Users/fuwang/Documents/Columbia University/Spring 2022/STAT 5241/Project/Final Project/test.txt'
test_q5.shape
(5000, 1569)
np.unique(train_q5.iloc[:,1568:1569].values)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18])
The last coordinate is the label
class CustomerDataset(Dataset):
def __init__(self, file_path):
file_out = pd.read_table(file_path,sep=',',names=range(1569))
X = file_out.iloc[:,:1568].values
y = file_out.iloc[:,1568].values
# Standardlize y
sc = StandardScaler()
X = sc.fit_transform(X)
# convert to tensor
self.X = torch.tensor(X,dtype=torch.float32)
self.y = torch.tensor(y)
def __len__(self):
return len(self.y)
def __getitem__(self,idx):
return self.X[idx],self.y[idx]
train_q5 = CustomerDataset(train_path)
val_q5 = CustomerDataset(val_path)
test_q5 = CustomerDataset(test_path)
train_loader = DataLoader(train_q5, batch_size=100, shuffle=True)
val_loader = DataLoader(val_q5,batch_size=100, shuffle=True)
test_loader = DataLoader(test_q5, batch_size=500, shuffle=False)
for b, (X_train, y_train) in enumerate(train_loader):
break
y_train
tensor([ 6, 7, 6, 15, 2, 10, 9, 9, 16, 13, 9, 9, 7, 11, 4, 13, 7, 13,
3, 17, 9, 8, 12, 2, 7, 13, 17, 13, 9, 12, 10, 17, 13, 12, 10, 6,
12, 6, 5, 9, 10, 7, 5, 8, 10, 4, 11, 11, 8, 11, 1, 5, 8, 9,
12, 2, 8, 9, 12, 4, 11, 11, 12, 8, 15, 7, 1, 16, 6, 7, 6, 16,
10, 12, 7, 12, 13, 8, 17, 4, 5, 4, 2, 11, 13, 8, 5, 18, 11, 5,
10, 8, 16, 12, 9, 7, 8, 10, 8, 13])
class MultilayerPerceptron(nn.Module):
def __init__(self, in_sz=1568, out_sz=19, layers=[784,120,84]):
super().__init__()
self.fc1 = nn.Linear(in_sz,layers[0])
self.fc2 = nn.Linear(layers[0],layers[1])
self.fc3 = nn.Linear(layers[1],layers[2])
self.fc4 = nn.Linear(layers[2],out_sz)
def forward(self,X):
X = F.relu(self.fc1(X))
X = F.relu(self.fc2(X))
X = F.relu(self.fc3(X))
X = self.fc4(X)
return F.log_softmax(X, dim=1)
torch.manual_seed(42)
model = MultilayerPerceptron()
model
MultilayerPerceptron( (fc1): Linear(in_features=1568, out_features=784, bias=True) (fc2): Linear(in_features=784, out_features=120, bias=True) (fc3): Linear(in_features=120, out_features=84, bias=True) (fc4): Linear(in_features=84, out_features=19, bias=True) )
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# 1st training
import time
start_time = time.time()
torch.manual_seed(42)
epochs = 150
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = model(X_train)
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%200 == 0:
print(f'epoch: {i:2} batch: {b:4} [{100*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(100*b):7.3f}%')
# Update train loss & accuracy for the epoch
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_vali, y_vali) in enumerate(test_loader):
# Apply the model
y_val = model(X_vali) # Here we flatten X_test
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_vali).sum()
# Update test loss & accuracy for the epoch
loss = criterion(y_val, y_vali)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 200 [ 20000/60000] loss: 1.25705862 accuracy: 38.160% epoch: 1 batch: 200 [ 20000/60000] loss: 0.62453997 accuracy: 71.855% epoch: 2 batch: 200 [ 20000/60000] loss: 0.74645686 accuracy: 82.770% epoch: 3 batch: 200 [ 20000/60000] loss: 0.41702273 accuracy: 88.175% epoch: 4 batch: 200 [ 20000/60000] loss: 0.33985245 accuracy: 92.365% epoch: 5 batch: 200 [ 20000/60000] loss: 0.19088845 accuracy: 94.565% epoch: 6 batch: 200 [ 20000/60000] loss: 0.18829584 accuracy: 96.140% epoch: 7 batch: 200 [ 20000/60000] loss: 0.08044560 accuracy: 97.745% epoch: 8 batch: 200 [ 20000/60000] loss: 0.07231690 accuracy: 97.510% epoch: 9 batch: 200 [ 20000/60000] loss: 0.27361739 accuracy: 95.670% epoch: 10 batch: 200 [ 20000/60000] loss: 0.12407091 accuracy: 96.360% epoch: 11 batch: 200 [ 20000/60000] loss: 0.08757715 accuracy: 97.060% epoch: 12 batch: 200 [ 20000/60000] loss: 0.07580175 accuracy: 97.715% epoch: 13 batch: 200 [ 20000/60000] loss: 0.07344008 accuracy: 98.050% epoch: 14 batch: 200 [ 20000/60000] loss: 0.29633653 accuracy: 98.175% epoch: 15 batch: 200 [ 20000/60000] loss: 0.02865674 accuracy: 98.510% epoch: 16 batch: 200 [ 20000/60000] loss: 0.17797227 accuracy: 97.845% epoch: 17 batch: 200 [ 20000/60000] loss: 0.06879742 accuracy: 97.180% epoch: 18 batch: 200 [ 20000/60000] loss: 0.08643096 accuracy: 97.295% epoch: 19 batch: 200 [ 20000/60000] loss: 0.02029056 accuracy: 98.315% epoch: 20 batch: 200 [ 20000/60000] loss: 0.00865414 accuracy: 98.970% epoch: 21 batch: 200 [ 20000/60000] loss: 0.03182900 accuracy: 98.985% epoch: 22 batch: 200 [ 20000/60000] loss: 0.03862184 accuracy: 98.435% epoch: 23 batch: 200 [ 20000/60000] loss: 0.04126069 accuracy: 98.330% epoch: 24 batch: 200 [ 20000/60000] loss: 0.02359360 accuracy: 98.375% epoch: 25 batch: 200 [ 20000/60000] loss: 0.16710466 accuracy: 98.855% epoch: 26 batch: 200 [ 20000/60000] loss: 0.12654404 accuracy: 98.535% epoch: 27 batch: 200 [ 20000/60000] loss: 0.01408652 accuracy: 98.155% epoch: 28 batch: 200 [ 20000/60000] loss: 0.02612695 accuracy: 98.675% epoch: 29 batch: 200 [ 20000/60000] loss: 0.04433857 accuracy: 98.515% epoch: 30 batch: 200 [ 20000/60000] loss: 0.11290598 accuracy: 98.440% epoch: 31 batch: 200 [ 20000/60000] loss: 0.01429099 accuracy: 98.785% epoch: 32 batch: 200 [ 20000/60000] loss: 0.11720677 accuracy: 98.985% epoch: 33 batch: 200 [ 20000/60000] loss: 0.08675665 accuracy: 98.965% epoch: 34 batch: 200 [ 20000/60000] loss: 0.00984627 accuracy: 98.570% epoch: 35 batch: 200 [ 20000/60000] loss: 0.05045847 accuracy: 98.435% epoch: 36 batch: 200 [ 20000/60000] loss: 0.09192266 accuracy: 98.505% epoch: 37 batch: 200 [ 20000/60000] loss: 0.05525253 accuracy: 98.595% epoch: 38 batch: 200 [ 20000/60000] loss: 0.04769710 accuracy: 99.080% epoch: 39 batch: 200 [ 20000/60000] loss: 0.00756064 accuracy: 99.270% epoch: 40 batch: 200 [ 20000/60000] loss: 0.04521766 accuracy: 98.745% epoch: 41 batch: 200 [ 20000/60000] loss: 0.02420994 accuracy: 99.005% epoch: 42 batch: 200 [ 20000/60000] loss: 0.19763561 accuracy: 99.240% epoch: 43 batch: 200 [ 20000/60000] loss: 0.00933103 accuracy: 99.350% epoch: 44 batch: 200 [ 20000/60000] loss: 0.01448879 accuracy: 98.715% epoch: 45 batch: 200 [ 20000/60000] loss: 0.00706874 accuracy: 98.905% epoch: 46 batch: 200 [ 20000/60000] loss: 0.00998105 accuracy: 99.075% epoch: 47 batch: 200 [ 20000/60000] loss: 0.02612734 accuracy: 99.505% epoch: 48 batch: 200 [ 20000/60000] loss: 0.06312460 accuracy: 98.855% epoch: 49 batch: 200 [ 20000/60000] loss: 0.05409915 accuracy: 98.055% epoch: 50 batch: 200 [ 20000/60000] loss: 0.01312007 accuracy: 98.415% epoch: 51 batch: 200 [ 20000/60000] loss: 0.04233754 accuracy: 99.200% epoch: 52 batch: 200 [ 20000/60000] loss: 0.01812300 accuracy: 99.140% epoch: 53 batch: 200 [ 20000/60000] loss: 0.00485015 accuracy: 99.215% epoch: 54 batch: 200 [ 20000/60000] loss: 0.16497712 accuracy: 99.190% epoch: 55 batch: 200 [ 20000/60000] loss: 0.00084036 accuracy: 99.340% epoch: 56 batch: 200 [ 20000/60000] loss: 0.00751175 accuracy: 99.015% epoch: 57 batch: 200 [ 20000/60000] loss: 0.00243141 accuracy: 98.760% epoch: 58 batch: 200 [ 20000/60000] loss: 0.06962513 accuracy: 98.870% epoch: 59 batch: 200 [ 20000/60000] loss: 0.00765433 accuracy: 99.190% epoch: 60 batch: 200 [ 20000/60000] loss: 0.00064846 accuracy: 99.395% epoch: 61 batch: 200 [ 20000/60000] loss: 0.03398927 accuracy: 99.410% epoch: 62 batch: 200 [ 20000/60000] loss: 0.00087600 accuracy: 99.375% epoch: 63 batch: 200 [ 20000/60000] loss: 0.01236061 accuracy: 99.070% epoch: 64 batch: 200 [ 20000/60000] loss: 0.00652742 accuracy: 99.150% epoch: 65 batch: 200 [ 20000/60000] loss: 0.04085486 accuracy: 98.555% epoch: 66 batch: 200 [ 20000/60000] loss: 0.00110008 accuracy: 99.085% epoch: 67 batch: 200 [ 20000/60000] loss: 0.10714024 accuracy: 98.980% epoch: 68 batch: 200 [ 20000/60000] loss: 0.00427342 accuracy: 98.535% epoch: 69 batch: 200 [ 20000/60000] loss: 0.00699804 accuracy: 98.750% epoch: 70 batch: 200 [ 20000/60000] loss: 0.00920460 accuracy: 99.615% epoch: 71 batch: 200 [ 20000/60000] loss: 0.00016062 accuracy: 99.810% epoch: 72 batch: 200 [ 20000/60000] loss: 0.07370105 accuracy: 99.755% epoch: 73 batch: 200 [ 20000/60000] loss: 0.01697337 accuracy: 98.960% epoch: 74 batch: 200 [ 20000/60000] loss: 0.14595452 accuracy: 98.515% epoch: 75 batch: 200 [ 20000/60000] loss: 0.00036533 accuracy: 99.085% epoch: 76 batch: 200 [ 20000/60000] loss: 0.01949938 accuracy: 99.560% epoch: 77 batch: 200 [ 20000/60000] loss: 0.01925514 accuracy: 99.590% epoch: 78 batch: 200 [ 20000/60000] loss: 0.06205363 accuracy: 99.000% epoch: 79 batch: 200 [ 20000/60000] loss: 0.06732173 accuracy: 99.220% epoch: 80 batch: 200 [ 20000/60000] loss: 0.00311567 accuracy: 99.460% epoch: 81 batch: 200 [ 20000/60000] loss: 0.00084015 accuracy: 99.470% epoch: 82 batch: 200 [ 20000/60000] loss: 0.15094917 accuracy: 99.295% epoch: 83 batch: 200 [ 20000/60000] loss: 0.05636629 accuracy: 98.690% epoch: 84 batch: 200 [ 20000/60000] loss: 0.02889506 accuracy: 98.270% epoch: 85 batch: 200 [ 20000/60000] loss: 0.01586589 accuracy: 98.510% epoch: 86 batch: 200 [ 20000/60000] loss: 0.00531425 accuracy: 99.425% epoch: 87 batch: 200 [ 20000/60000] loss: 0.00037418 accuracy: 99.680% epoch: 88 batch: 200 [ 20000/60000] loss: 0.00015998 accuracy: 99.925% epoch: 89 batch: 200 [ 20000/60000] loss: 0.00006058 accuracy: 99.915% epoch: 90 batch: 200 [ 20000/60000] loss: 0.09008190 accuracy: 99.805% epoch: 91 batch: 200 [ 20000/60000] loss: 0.05306465 accuracy: 99.305% epoch: 92 batch: 200 [ 20000/60000] loss: 0.03765136 accuracy: 98.675% epoch: 93 batch: 200 [ 20000/60000] loss: 0.00046837 accuracy: 99.040% epoch: 94 batch: 200 [ 20000/60000] loss: 0.00718095 accuracy: 99.380% epoch: 95 batch: 200 [ 20000/60000] loss: 0.01133446 accuracy: 99.305% epoch: 96 batch: 200 [ 20000/60000] loss: 0.04105724 accuracy: 99.030% epoch: 97 batch: 200 [ 20000/60000] loss: 0.05786487 accuracy: 98.930% epoch: 98 batch: 200 [ 20000/60000] loss: 0.00019380 accuracy: 99.185% epoch: 99 batch: 200 [ 20000/60000] loss: 0.00022632 accuracy: 99.620% epoch: 100 batch: 200 [ 20000/60000] loss: 0.08666576 accuracy: 99.790% epoch: 101 batch: 200 [ 20000/60000] loss: 0.00005962 accuracy: 99.930% epoch: 102 batch: 200 [ 20000/60000] loss: 0.00661197 accuracy: 99.140% epoch: 103 batch: 200 [ 20000/60000] loss: 0.01236901 accuracy: 98.990% epoch: 104 batch: 200 [ 20000/60000] loss: 0.00552401 accuracy: 98.620% epoch: 105 batch: 200 [ 20000/60000] loss: 0.04708955 accuracy: 99.090% epoch: 106 batch: 200 [ 20000/60000] loss: 0.00144274 accuracy: 99.390% epoch: 107 batch: 200 [ 20000/60000] loss: 0.04486023 accuracy: 99.650% epoch: 108 batch: 200 [ 20000/60000] loss: 0.00311539 accuracy: 99.720% epoch: 109 batch: 200 [ 20000/60000] loss: 0.00170598 accuracy: 99.800% epoch: 110 batch: 200 [ 20000/60000] loss: 0.01181535 accuracy: 99.720% epoch: 111 batch: 200 [ 20000/60000] loss: 0.08539143 accuracy: 99.555% epoch: 112 batch: 200 [ 20000/60000] loss: 0.00051447 accuracy: 98.840% epoch: 113 batch: 200 [ 20000/60000] loss: 0.05915627 accuracy: 99.150% epoch: 114 batch: 200 [ 20000/60000] loss: 0.00985572 accuracy: 99.050% epoch: 115 batch: 200 [ 20000/60000] loss: 0.00020214 accuracy: 99.355% epoch: 116 batch: 200 [ 20000/60000] loss: 0.02084630 accuracy: 99.545% epoch: 117 batch: 200 [ 20000/60000] loss: 0.00068072 accuracy: 99.450% epoch: 118 batch: 200 [ 20000/60000] loss: 0.01468053 accuracy: 99.290% epoch: 119 batch: 200 [ 20000/60000] loss: 0.00989334 accuracy: 99.540% epoch: 120 batch: 200 [ 20000/60000] loss: 0.01289255 accuracy: 99.545% epoch: 121 batch: 200 [ 20000/60000] loss: 0.00015939 accuracy: 99.500% epoch: 122 batch: 200 [ 20000/60000] loss: 0.05085357 accuracy: 99.040% epoch: 123 batch: 200 [ 20000/60000] loss: 0.00014960 accuracy: 98.725% epoch: 124 batch: 200 [ 20000/60000] loss: 0.00189603 accuracy: 99.405% epoch: 125 batch: 200 [ 20000/60000] loss: 0.00295733 accuracy: 99.625% epoch: 126 batch: 200 [ 20000/60000] loss: 0.00344029 accuracy: 99.215% epoch: 127 batch: 200 [ 20000/60000] loss: 0.03791691 accuracy: 99.480% epoch: 128 batch: 200 [ 20000/60000] loss: 0.01838722 accuracy: 99.165% epoch: 129 batch: 200 [ 20000/60000] loss: 0.00127676 accuracy: 99.380% epoch: 130 batch: 200 [ 20000/60000] loss: 0.00448255 accuracy: 99.720% epoch: 131 batch: 200 [ 20000/60000] loss: 0.05285425 accuracy: 99.350% epoch: 132 batch: 200 [ 20000/60000] loss: 0.03042428 accuracy: 99.140% epoch: 133 batch: 200 [ 20000/60000] loss: 0.11367848 accuracy: 99.285% epoch: 134 batch: 200 [ 20000/60000] loss: 0.00060220 accuracy: 99.385% epoch: 135 batch: 200 [ 20000/60000] loss: 0.00032090 accuracy: 99.765% epoch: 136 batch: 200 [ 20000/60000] loss: 0.00001983 accuracy: 99.800% epoch: 137 batch: 200 [ 20000/60000] loss: 0.00001464 accuracy: 99.955% epoch: 138 batch: 200 [ 20000/60000] loss: 0.10234787 accuracy: 99.925% epoch: 139 batch: 200 [ 20000/60000] loss: 0.00024614 accuracy: 99.900% epoch: 140 batch: 200 [ 20000/60000] loss: 0.00025283 accuracy: 99.800% epoch: 141 batch: 200 [ 20000/60000] loss: 0.00022177 accuracy: 99.335% epoch: 142 batch: 200 [ 20000/60000] loss: 0.08524502 accuracy: 99.055% epoch: 143 batch: 200 [ 20000/60000] loss: 0.09636721 accuracy: 98.720% epoch: 144 batch: 200 [ 20000/60000] loss: 0.10549641 accuracy: 99.180% epoch: 145 batch: 200 [ 20000/60000] loss: 0.00011627 accuracy: 98.930% epoch: 146 batch: 200 [ 20000/60000] loss: 0.00396050 accuracy: 99.165% epoch: 147 batch: 200 [ 20000/60000] loss: 0.03167178 accuracy: 99.405% epoch: 148 batch: 200 [ 20000/60000] loss: 0.00681029 accuracy: 99.720% epoch: 149 batch: 200 [ 20000/60000] loss: 0.00024372 accuracy: 99.915% Duration: 504 seconds
## loss
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(15, 10), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
## misclassification
figure(figsize=(15, 10), dpi=80)
plt.plot([t/200 for t in train_correct], label='training accuracy')
plt.plot([t/50 for t in test_correct], label='validation accuracy')
plt.title('Misclassification error at the end of each epoch')
plt.legend();
train_cc = [t/200 for t in train_correct]
test_cc = [t/50 for t in test_correct]
# Extract the data all at once, not in batches
test_load_all = DataLoader(test_q5, batch_size=5000, shuffle=False)
with torch.no_grad():
correct = 0
for X_test, y_test in test_load_all:
y_val = model(X_test) # we don't flatten the data this time
predicted = torch.max(y_val,1)[1]
correct += (predicted == y_test).sum()
print(f'Test accuracy: {correct.item()}/{len(X_test)} = {correct.item()*100/(len(X_test)):7.3f}%')
Test accuracy: 4141/5000 = 82.820%
max_index = test_cc.index(max(test_cc))
max_index
149
# 2nd training
import time
start_time = time.time()
torch.manual_seed(77)
epochs = 150
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = model(X_train)
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%200 == 0:
print(f'epoch: {i:2} batch: {b:4} [{100*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(100*b):7.3f}%')
# Update train loss & accuracy for the epoch
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_vali, y_vali) in enumerate(test_loader):
# Apply the model
y_val = model(X_vali) # Here we flatten X_test
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_vali).sum()
# Update test loss & accuracy for the epoch
loss = criterion(y_val, y_vali)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 200 [ 20000/60000] loss: 0.00005587 accuracy: 99.855% epoch: 1 batch: 200 [ 20000/60000] loss: 0.00055168 accuracy: 99.890% epoch: 2 batch: 200 [ 20000/60000] loss: 0.00076735 accuracy: 99.860% epoch: 3 batch: 200 [ 20000/60000] loss: 0.01120381 accuracy: 99.785% epoch: 4 batch: 200 [ 20000/60000] loss: 0.04392472 accuracy: 99.565% epoch: 5 batch: 200 [ 20000/60000] loss: 0.00131232 accuracy: 99.455% epoch: 6 batch: 200 [ 20000/60000] loss: 0.00132495 accuracy: 99.480% epoch: 7 batch: 200 [ 20000/60000] loss: 0.15526809 accuracy: 98.770% epoch: 8 batch: 200 [ 20000/60000] loss: 0.06294633 accuracy: 98.935% epoch: 9 batch: 200 [ 20000/60000] loss: 0.00026358 accuracy: 99.205% epoch: 10 batch: 200 [ 20000/60000] loss: 0.11290230 accuracy: 99.710% epoch: 11 batch: 200 [ 20000/60000] loss: 0.00000364 accuracy: 99.830% epoch: 12 batch: 200 [ 20000/60000] loss: 0.00020970 accuracy: 99.810% epoch: 13 batch: 200 [ 20000/60000] loss: 0.00003236 accuracy: 99.835% epoch: 14 batch: 200 [ 20000/60000] loss: 0.09391158 accuracy: 99.405% epoch: 15 batch: 200 [ 20000/60000] loss: 0.00722070 accuracy: 99.280% epoch: 16 batch: 200 [ 20000/60000] loss: 0.13120225 accuracy: 99.555% epoch: 17 batch: 200 [ 20000/60000] loss: 0.00324300 accuracy: 99.100% epoch: 18 batch: 200 [ 20000/60000] loss: 0.00000180 accuracy: 99.350% epoch: 19 batch: 200 [ 20000/60000] loss: 0.12298802 accuracy: 99.525% epoch: 20 batch: 200 [ 20000/60000] loss: 0.00000040 accuracy: 99.540% epoch: 21 batch: 200 [ 20000/60000] loss: 0.00013985 accuracy: 99.675% epoch: 22 batch: 200 [ 20000/60000] loss: 0.00000164 accuracy: 99.295% epoch: 23 batch: 200 [ 20000/60000] loss: 0.23401596 accuracy: 99.210% epoch: 24 batch: 200 [ 20000/60000] loss: 0.00506755 accuracy: 99.500% epoch: 25 batch: 200 [ 20000/60000] loss: 0.00026319 accuracy: 99.600% epoch: 26 batch: 200 [ 20000/60000] loss: 0.00787578 accuracy: 99.515% epoch: 27 batch: 200 [ 20000/60000] loss: 0.00559079 accuracy: 99.580% epoch: 28 batch: 200 [ 20000/60000] loss: 0.00038253 accuracy: 99.560% epoch: 29 batch: 200 [ 20000/60000] loss: 0.00049528 accuracy: 99.655% epoch: 30 batch: 200 [ 20000/60000] loss: 0.02249262 accuracy: 99.565% epoch: 31 batch: 200 [ 20000/60000] loss: 0.00000967 accuracy: 99.580% epoch: 32 batch: 200 [ 20000/60000] loss: 0.01130196 accuracy: 99.580% epoch: 33 batch: 200 [ 20000/60000] loss: 0.00000088 accuracy: 99.695% epoch: 34 batch: 200 [ 20000/60000] loss: 0.00004646 accuracy: 99.845% epoch: 35 batch: 200 [ 20000/60000] loss: 0.09477782 accuracy: 99.520% epoch: 36 batch: 200 [ 20000/60000] loss: 0.09536427 accuracy: 99.295% epoch: 37 batch: 200 [ 20000/60000] loss: 0.03373542 accuracy: 99.575% epoch: 38 batch: 200 [ 20000/60000] loss: 0.00000003 accuracy: 99.670% epoch: 39 batch: 200 [ 20000/60000] loss: 0.00000237 accuracy: 99.735% epoch: 40 batch: 200 [ 20000/60000] loss: 0.00001310 accuracy: 99.690% epoch: 41 batch: 200 [ 20000/60000] loss: 0.00044924 accuracy: 99.810% epoch: 42 batch: 200 [ 20000/60000] loss: 0.00145313 accuracy: 99.850% epoch: 43 batch: 200 [ 20000/60000] loss: 0.00359638 accuracy: 99.360% epoch: 44 batch: 200 [ 20000/60000] loss: 0.07130854 accuracy: 99.085% epoch: 45 batch: 200 [ 20000/60000] loss: 0.00000217 accuracy: 99.195% epoch: 46 batch: 200 [ 20000/60000] loss: 0.00000427 accuracy: 99.360% epoch: 47 batch: 200 [ 20000/60000] loss: 0.24599952 accuracy: 99.325% epoch: 48 batch: 200 [ 20000/60000] loss: 0.00000974 accuracy: 99.515% epoch: 49 batch: 200 [ 20000/60000] loss: 0.00187318 accuracy: 99.785% epoch: 50 batch: 200 [ 20000/60000] loss: 0.00000004 accuracy: 99.745% epoch: 51 batch: 200 [ 20000/60000] loss: 0.24077721 accuracy: 99.665% epoch: 52 batch: 200 [ 20000/60000] loss: 0.00006398 accuracy: 99.610% epoch: 53 batch: 200 [ 20000/60000] loss: 0.03904568 accuracy: 99.455% epoch: 54 batch: 200 [ 20000/60000] loss: 0.32172969 accuracy: 98.995% epoch: 55 batch: 200 [ 20000/60000] loss: 0.00039936 accuracy: 99.290% epoch: 56 batch: 200 [ 20000/60000] loss: 0.66826767 accuracy: 99.500% epoch: 57 batch: 200 [ 20000/60000] loss: 0.79950637 accuracy: 99.325% epoch: 58 batch: 200 [ 20000/60000] loss: 0.15843672 accuracy: 99.545% epoch: 59 batch: 200 [ 20000/60000] loss: 0.00000032 accuracy: 99.795% epoch: 60 batch: 200 [ 20000/60000] loss: 0.02018395 accuracy: 99.920% epoch: 61 batch: 200 [ 20000/60000] loss: 0.00001499 accuracy: 99.970% epoch: 62 batch: 200 [ 20000/60000] loss: 0.00131452 accuracy: 99.815% epoch: 63 batch: 200 [ 20000/60000] loss: 0.00000062 accuracy: 99.610% epoch: 64 batch: 200 [ 20000/60000] loss: 0.00058875 accuracy: 99.550% epoch: 65 batch: 200 [ 20000/60000] loss: 0.07199865 accuracy: 99.450% epoch: 66 batch: 200 [ 20000/60000] loss: 0.07463478 accuracy: 99.240% epoch: 67 batch: 200 [ 20000/60000] loss: 0.11732498 accuracy: 99.510% epoch: 68 batch: 200 [ 20000/60000] loss: 0.11611837 accuracy: 99.430% epoch: 69 batch: 200 [ 20000/60000] loss: 0.00055262 accuracy: 99.510% epoch: 70 batch: 200 [ 20000/60000] loss: 0.09544025 accuracy: 99.660% epoch: 71 batch: 200 [ 20000/60000] loss: 0.00000045 accuracy: 99.735% epoch: 72 batch: 200 [ 20000/60000] loss: 0.00000784 accuracy: 99.795% epoch: 73 batch: 200 [ 20000/60000] loss: 0.01548367 accuracy: 99.605% epoch: 74 batch: 200 [ 20000/60000] loss: 0.00371149 accuracy: 99.075% epoch: 75 batch: 200 [ 20000/60000] loss: 0.02026426 accuracy: 99.245% epoch: 76 batch: 200 [ 20000/60000] loss: 0.33985272 accuracy: 99.385% epoch: 77 batch: 200 [ 20000/60000] loss: 0.00052211 accuracy: 99.655% epoch: 78 batch: 200 [ 20000/60000] loss: 0.12729484 accuracy: 99.775% epoch: 79 batch: 200 [ 20000/60000] loss: 0.00000015 accuracy: 99.765% epoch: 80 batch: 200 [ 20000/60000] loss: 0.00001417 accuracy: 99.855% epoch: 81 batch: 200 [ 20000/60000] loss: 0.03334255 accuracy: 99.755% epoch: 82 batch: 200 [ 20000/60000] loss: 0.00007355 accuracy: 99.735% epoch: 83 batch: 200 [ 20000/60000] loss: 0.00005754 accuracy: 99.800% epoch: 84 batch: 200 [ 20000/60000] loss: 0.00006918 accuracy: 99.505% epoch: 85 batch: 200 [ 20000/60000] loss: 0.00010874 accuracy: 99.235% epoch: 86 batch: 200 [ 20000/60000] loss: 0.29350147 accuracy: 98.720% epoch: 87 batch: 200 [ 20000/60000] loss: 0.01241281 accuracy: 99.355% epoch: 88 batch: 200 [ 20000/60000] loss: 0.00079550 accuracy: 99.570% epoch: 89 batch: 200 [ 20000/60000] loss: 0.00000034 accuracy: 99.845% epoch: 90 batch: 200 [ 20000/60000] loss: 0.00000002 accuracy: 99.910% epoch: 91 batch: 200 [ 20000/60000] loss: 0.00000024 accuracy: 99.970% epoch: 92 batch: 200 [ 20000/60000] loss: 0.00000132 accuracy: 100.000% epoch: 93 batch: 200 [ 20000/60000] loss: 0.00000349 accuracy: 100.000% epoch: 94 batch: 200 [ 20000/60000] loss: 0.00000006 accuracy: 100.000% epoch: 95 batch: 200 [ 20000/60000] loss: 0.00000000 accuracy: 100.000% epoch: 96 batch: 200 [ 20000/60000] loss: 0.00000001 accuracy: 100.000% epoch: 97 batch: 200 [ 20000/60000] loss: 0.00000168 accuracy: 100.000% epoch: 98 batch: 200 [ 20000/60000] loss: 0.00000036 accuracy: 100.000% epoch: 99 batch: 200 [ 20000/60000] loss: 0.00000009 accuracy: 100.000% epoch: 100 batch: 200 [ 20000/60000] loss: 0.00000000 accuracy: 100.000% epoch: 101 batch: 200 [ 20000/60000] loss: 0.00000085 accuracy: 100.000% epoch: 102 batch: 200 [ 20000/60000] loss: 0.00000000 accuracy: 100.000% epoch: 103 batch: 200 [ 20000/60000] loss: 0.00000091 accuracy: 100.000% epoch: 104 batch: 200 [ 20000/60000] loss: 0.00000029 accuracy: 100.000% epoch: 105 batch: 200 [ 20000/60000] loss: 0.00000000 accuracy: 100.000% epoch: 106 batch: 200 [ 20000/60000] loss: 0.00000124 accuracy: 100.000% epoch: 107 batch: 200 [ 20000/60000] loss: 0.00000049 accuracy: 100.000% epoch: 108 batch: 200 [ 20000/60000] loss: 0.00000361 accuracy: 100.000% epoch: 109 batch: 200 [ 20000/60000] loss: 0.00001135 accuracy: 100.000% epoch: 110 batch: 200 [ 20000/60000] loss: 0.00000210 accuracy: 100.000% epoch: 111 batch: 200 [ 20000/60000] loss: 0.00000018 accuracy: 100.000% epoch: 112 batch: 200 [ 20000/60000] loss: 0.00000014 accuracy: 100.000% epoch: 113 batch: 200 [ 20000/60000] loss: 0.00000033 accuracy: 100.000% epoch: 114 batch: 200 [ 20000/60000] loss: 0.00000000 accuracy: 100.000% epoch: 115 batch: 200 [ 20000/60000] loss: 0.00000019 accuracy: 100.000% epoch: 116 batch: 200 [ 20000/60000] loss: 0.00000113 accuracy: 100.000% epoch: 117 batch: 200 [ 20000/60000] loss: 0.00000000 accuracy: 100.000% epoch: 118 batch: 200 [ 20000/60000] loss: 0.00000008 accuracy: 100.000% epoch: 119 batch: 200 [ 20000/60000] loss: 0.00000032 accuracy: 100.000% epoch: 120 batch: 200 [ 20000/60000] loss: 0.00000000 accuracy: 100.000% epoch: 121 batch: 200 [ 20000/60000] loss: 0.00000000 accuracy: 100.000% epoch: 122 batch: 200 [ 20000/60000] loss: 0.00000002 accuracy: 100.000% epoch: 123 batch: 200 [ 20000/60000] loss: 0.00000021 accuracy: 100.000% epoch: 124 batch: 200 [ 20000/60000] loss: 0.00000019 accuracy: 100.000% epoch: 125 batch: 200 [ 20000/60000] loss: 0.00000001 accuracy: 100.000% epoch: 126 batch: 200 [ 20000/60000] loss: 0.00000023 accuracy: 100.000% epoch: 127 batch: 200 [ 20000/60000] loss: 0.00000010 accuracy: 100.000% epoch: 128 batch: 200 [ 20000/60000] loss: 0.00000000 accuracy: 100.000% epoch: 129 batch: 200 [ 20000/60000] loss: 0.00000019 accuracy: 100.000% epoch: 130 batch: 200 [ 20000/60000] loss: 0.00000090 accuracy: 100.000% epoch: 131 batch: 200 [ 20000/60000] loss: 0.00000007 accuracy: 100.000% epoch: 132 batch: 200 [ 20000/60000] loss: 0.00000039 accuracy: 100.000% epoch: 133 batch: 200 [ 20000/60000] loss: 0.00000021 accuracy: 100.000% epoch: 134 batch: 200 [ 20000/60000] loss: 0.00000026 accuracy: 100.000% epoch: 135 batch: 200 [ 20000/60000] loss: 0.00000008 accuracy: 100.000% epoch: 136 batch: 200 [ 20000/60000] loss: 0.00000000 accuracy: 100.000% epoch: 137 batch: 200 [ 20000/60000] loss: 0.00000003 accuracy: 100.000% epoch: 138 batch: 200 [ 20000/60000] loss: 0.00000000 accuracy: 100.000% epoch: 139 batch: 200 [ 20000/60000] loss: 0.00000001 accuracy: 100.000% epoch: 140 batch: 200 [ 20000/60000] loss: 0.00000012 accuracy: 100.000% epoch: 141 batch: 200 [ 20000/60000] loss: 0.00000004 accuracy: 100.000% epoch: 142 batch: 200 [ 20000/60000] loss: 0.00000002 accuracy: 100.000% epoch: 143 batch: 200 [ 20000/60000] loss: 0.00000026 accuracy: 100.000% epoch: 144 batch: 200 [ 20000/60000] loss: 0.00000007 accuracy: 100.000% epoch: 145 batch: 200 [ 20000/60000] loss: 0.00000010 accuracy: 100.000% epoch: 146 batch: 200 [ 20000/60000] loss: 0.00000007 accuracy: 100.000% epoch: 147 batch: 200 [ 20000/60000] loss: 0.00000000 accuracy: 100.000% epoch: 148 batch: 200 [ 20000/60000] loss: 0.00000001 accuracy: 100.000% epoch: 149 batch: 200 [ 20000/60000] loss: 0.00000002 accuracy: 100.000% Duration: 907 seconds
## loss
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(15, 10), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
## accuracy
figure(figsize=(15, 10), dpi=80)
plt.plot([t/200 for t in train_correct], label='training accuracy')
plt.plot([t/50 for t in test_correct], label='validation accuracy')
plt.title('accuracy error at the end of each epoch')
plt.legend();
with torch.no_grad():
correct = 0
for X_test, y_test in test_load_all:
y_val = model(X_test) # we don't flatten the data this time
predicted = torch.max(y_val,1)[1]
correct += (predicted == y_test).sum()
print(f'Test accuracy: {correct.item()}/{len(X_test)} = {correct.item()*100/(len(X_test)):7.3f}%')
Test accuracy: 4158/5000 = 83.160%
max_index = test_cc.index(max(test_cc))
max_index
149
weights = []
for wei in model.parameters():
weights.append(wei.detach().cpu().numpy())
for wei in weights:
print(wei.shape)
(784, 1568) (784,) (120, 784) (120,) (84, 120) (84,) (19, 84) (19,)
# Assemble to images.
fc_images = weights[2].reshape(10, 12, 28, 28)
pad_images = np.zeros((10, 12, 30, 30))
pad_images[:, :, 2:30, 2:30] = fc_images
fc_vis = np.transpose(pad_images, (0, 2, 1, 3)).reshape(300, 360)
# Show weights.
figure(figsize=(15, 20), dpi=80)
plt.figure(figsize=(12, 12))
plt.imshow(fc_vis, cmap='gray')
plt.title("Learned W for multiple layers.")
plt.axis("off")
plt.show()
<Figure size 1200x1600 with 0 Axes>
train_loader = DataLoader(train_q5, batch_size=100, shuffle=True)
val_loader = DataLoader(val_q5,batch_size=100, shuffle=True)
test_loader = DataLoader(test_q5, batch_size=500, shuffle=False)
torch.manual_seed(42)
model = MultilayerPerceptron()
model
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# 1st training
import time
start_time = time.time()
torch.manual_seed(77)
epochs = 150
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = model(X_train)
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%200 == 0:
print(f'epoch: {i:2} batch: {b:4} [{100*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(100*b):7.3f}%')
# Update train loss & accuracy for the epoch
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_vali, y_vali) in enumerate(test_loader):
# Apply the model
y_val = model(X_vali) # Here we flatten X_test
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_vali).sum()
# Update test loss & accuracy for the epoch
loss = criterion(y_val, y_vali)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 200 [ 20000/60000] loss: 1.03678799 accuracy: 38.435% epoch: 1 batch: 200 [ 20000/60000] loss: 0.79400945 accuracy: 71.950% epoch: 2 batch: 200 [ 20000/60000] loss: 0.51622415 accuracy: 82.640% epoch: 3 batch: 200 [ 20000/60000] loss: 0.35579911 accuracy: 88.245% epoch: 4 batch: 200 [ 20000/60000] loss: 0.19203365 accuracy: 92.295% epoch: 5 batch: 200 [ 20000/60000] loss: 0.21662717 accuracy: 94.390% epoch: 6 batch: 200 [ 20000/60000] loss: 0.16883652 accuracy: 96.730% epoch: 7 batch: 200 [ 20000/60000] loss: 0.17763437 accuracy: 96.555% epoch: 8 batch: 200 [ 20000/60000] loss: 0.08822335 accuracy: 96.225% epoch: 9 batch: 200 [ 20000/60000] loss: 0.11703558 accuracy: 96.355% epoch: 10 batch: 200 [ 20000/60000] loss: 0.20223208 accuracy: 97.385% epoch: 11 batch: 200 [ 20000/60000] loss: 0.12303656 accuracy: 97.115% epoch: 12 batch: 200 [ 20000/60000] loss: 0.07749555 accuracy: 97.835% epoch: 13 batch: 200 [ 20000/60000] loss: 0.06747665 accuracy: 98.410% epoch: 14 batch: 200 [ 20000/60000] loss: 0.17162126 accuracy: 97.680% epoch: 15 batch: 200 [ 20000/60000] loss: 0.10626784 accuracy: 98.415% epoch: 16 batch: 200 [ 20000/60000] loss: 0.20034511 accuracy: 98.365% epoch: 17 batch: 200 [ 20000/60000] loss: 0.01615432 accuracy: 97.375% epoch: 18 batch: 200 [ 20000/60000] loss: 0.05999601 accuracy: 97.275% epoch: 19 batch: 200 [ 20000/60000] loss: 0.06671812 accuracy: 98.045% epoch: 20 batch: 200 [ 20000/60000] loss: 0.01548157 accuracy: 98.880% epoch: 21 batch: 200 [ 20000/60000] loss: 0.00912306 accuracy: 99.050% epoch: 22 batch: 200 [ 20000/60000] loss: 0.02492549 accuracy: 98.930% epoch: 23 batch: 200 [ 20000/60000] loss: 0.08377516 accuracy: 98.845% epoch: 24 batch: 200 [ 20000/60000] loss: 0.04254721 accuracy: 98.130% epoch: 25 batch: 200 [ 20000/60000] loss: 0.03384440 accuracy: 98.400% epoch: 26 batch: 200 [ 20000/60000] loss: 0.01546001 accuracy: 98.155% epoch: 27 batch: 200 [ 20000/60000] loss: 0.02044662 accuracy: 97.965% epoch: 28 batch: 200 [ 20000/60000] loss: 0.21884096 accuracy: 98.215% epoch: 29 batch: 200 [ 20000/60000] loss: 0.13506387 accuracy: 98.480% epoch: 30 batch: 200 [ 20000/60000] loss: 0.09833180 accuracy: 98.665% epoch: 31 batch: 200 [ 20000/60000] loss: 0.01108597 accuracy: 99.175% epoch: 32 batch: 200 [ 20000/60000] loss: 0.00527088 accuracy: 99.340% epoch: 33 batch: 200 [ 20000/60000] loss: 0.01058745 accuracy: 99.115% epoch: 34 batch: 200 [ 20000/60000] loss: 0.05767481 accuracy: 98.485% epoch: 35 batch: 200 [ 20000/60000] loss: 0.05931539 accuracy: 98.615% epoch: 36 batch: 200 [ 20000/60000] loss: 0.07174852 accuracy: 98.645% epoch: 37 batch: 200 [ 20000/60000] loss: 0.20607165 accuracy: 98.215% epoch: 38 batch: 200 [ 20000/60000] loss: 0.29870179 accuracy: 98.790% epoch: 39 batch: 200 [ 20000/60000] loss: 0.04769071 accuracy: 98.575% epoch: 40 batch: 200 [ 20000/60000] loss: 0.07613291 accuracy: 98.710% epoch: 41 batch: 200 [ 20000/60000] loss: 0.01363966 accuracy: 98.960% epoch: 42 batch: 200 [ 20000/60000] loss: 0.01157140 accuracy: 99.015% epoch: 43 batch: 200 [ 20000/60000] loss: 0.03166600 accuracy: 98.880% epoch: 44 batch: 200 [ 20000/60000] loss: 0.03456838 accuracy: 99.245% epoch: 45 batch: 200 [ 20000/60000] loss: 0.05215950 accuracy: 99.395% epoch: 46 batch: 200 [ 20000/60000] loss: 0.00370978 accuracy: 99.530% epoch: 47 batch: 200 [ 20000/60000] loss: 0.00479910 accuracy: 99.360% epoch: 48 batch: 200 [ 20000/60000] loss: 0.05425328 accuracy: 98.695% epoch: 49 batch: 200 [ 20000/60000] loss: 0.00588522 accuracy: 98.745% epoch: 50 batch: 200 [ 20000/60000] loss: 0.00617720 accuracy: 98.740% epoch: 51 batch: 200 [ 20000/60000] loss: 0.15336552 accuracy: 98.645% epoch: 52 batch: 200 [ 20000/60000] loss: 0.00143905 accuracy: 99.140% epoch: 53 batch: 200 [ 20000/60000] loss: 0.00689131 accuracy: 99.405% epoch: 54 batch: 200 [ 20000/60000] loss: 0.00171514 accuracy: 99.555% epoch: 55 batch: 200 [ 20000/60000] loss: 0.05973903 accuracy: 99.405% epoch: 56 batch: 200 [ 20000/60000] loss: 0.00286970 accuracy: 99.150% epoch: 57 batch: 200 [ 20000/60000] loss: 0.00617298 accuracy: 99.010% epoch: 58 batch: 200 [ 20000/60000] loss: 0.00134538 accuracy: 98.875% epoch: 59 batch: 200 [ 20000/60000] loss: 0.07708840 accuracy: 98.735% epoch: 60 batch: 200 [ 20000/60000] loss: 0.04676012 accuracy: 99.000% epoch: 61 batch: 200 [ 20000/60000] loss: 0.01159693 accuracy: 99.125% epoch: 62 batch: 200 [ 20000/60000] loss: 0.04223442 accuracy: 98.935% epoch: 63 batch: 200 [ 20000/60000] loss: 0.03132470 accuracy: 99.280% epoch: 64 batch: 200 [ 20000/60000] loss: 0.08786089 accuracy: 98.830% epoch: 65 batch: 200 [ 20000/60000] loss: 0.05928154 accuracy: 98.780% epoch: 66 batch: 200 [ 20000/60000] loss: 0.03628067 accuracy: 98.335% epoch: 67 batch: 200 [ 20000/60000] loss: 0.10849533 accuracy: 99.175% epoch: 68 batch: 200 [ 20000/60000] loss: 0.02676846 accuracy: 99.460% epoch: 69 batch: 200 [ 20000/60000] loss: 0.00206058 accuracy: 99.615% epoch: 70 batch: 200 [ 20000/60000] loss: 0.00338457 accuracy: 99.470% epoch: 71 batch: 200 [ 20000/60000] loss: 0.08010088 accuracy: 99.445% epoch: 72 batch: 200 [ 20000/60000] loss: 0.00342260 accuracy: 99.405% epoch: 73 batch: 200 [ 20000/60000] loss: 0.09239257 accuracy: 99.105% epoch: 74 batch: 200 [ 20000/60000] loss: 0.06465814 accuracy: 98.830% epoch: 75 batch: 200 [ 20000/60000] loss: 0.15868980 accuracy: 99.090% epoch: 76 batch: 200 [ 20000/60000] loss: 0.00678825 accuracy: 99.330% epoch: 77 batch: 200 [ 20000/60000] loss: 0.09502163 accuracy: 99.590% epoch: 78 batch: 200 [ 20000/60000] loss: 0.00229618 accuracy: 99.645% epoch: 79 batch: 200 [ 20000/60000] loss: 0.02848122 accuracy: 99.525% epoch: 80 batch: 200 [ 20000/60000] loss: 0.32753444 accuracy: 98.435% epoch: 81 batch: 200 [ 20000/60000] loss: 0.00137422 accuracy: 98.635% epoch: 82 batch: 200 [ 20000/60000] loss: 0.00873240 accuracy: 99.190% epoch: 83 batch: 200 [ 20000/60000] loss: 0.00023539 accuracy: 99.740% epoch: 84 batch: 200 [ 20000/60000] loss: 0.04535615 accuracy: 99.625% epoch: 85 batch: 200 [ 20000/60000] loss: 0.04412027 accuracy: 99.235% epoch: 86 batch: 200 [ 20000/60000] loss: 0.02685205 accuracy: 98.810% epoch: 87 batch: 200 [ 20000/60000] loss: 0.01217585 accuracy: 99.235% epoch: 88 batch: 200 [ 20000/60000] loss: 0.00011469 accuracy: 99.355% epoch: 89 batch: 200 [ 20000/60000] loss: 0.00152373 accuracy: 99.315% epoch: 90 batch: 200 [ 20000/60000] loss: 0.01227549 accuracy: 99.500% epoch: 91 batch: 200 [ 20000/60000] loss: 0.00205862 accuracy: 99.695% epoch: 92 batch: 200 [ 20000/60000] loss: 0.02852018 accuracy: 99.455% epoch: 93 batch: 200 [ 20000/60000] loss: 0.04832601 accuracy: 99.250% epoch: 94 batch: 200 [ 20000/60000] loss: 0.10405596 accuracy: 99.165% epoch: 95 batch: 200 [ 20000/60000] loss: 0.00220502 accuracy: 99.200% epoch: 96 batch: 200 [ 20000/60000] loss: 0.04508021 accuracy: 99.110% epoch: 97 batch: 200 [ 20000/60000] loss: 0.00269042 accuracy: 98.655% epoch: 98 batch: 200 [ 20000/60000] loss: 0.00216384 accuracy: 99.265% epoch: 99 batch: 200 [ 20000/60000] loss: 0.06154246 accuracy: 99.640% epoch: 100 batch: 200 [ 20000/60000] loss: 0.00178605 accuracy: 99.610% epoch: 101 batch: 200 [ 20000/60000] loss: 0.00021390 accuracy: 99.635% epoch: 102 batch: 200 [ 20000/60000] loss: 0.00025371 accuracy: 99.755% epoch: 103 batch: 200 [ 20000/60000] loss: 0.02415140 accuracy: 99.785% epoch: 104 batch: 200 [ 20000/60000] loss: 0.00146817 accuracy: 99.640% epoch: 105 batch: 200 [ 20000/60000] loss: 0.00009111 accuracy: 99.625% epoch: 106 batch: 200 [ 20000/60000] loss: 0.07846620 accuracy: 99.335% epoch: 107 batch: 200 [ 20000/60000] loss: 0.16802885 accuracy: 98.905% epoch: 108 batch: 200 [ 20000/60000] loss: 0.10243546 accuracy: 98.600% epoch: 109 batch: 200 [ 20000/60000] loss: 0.01836356 accuracy: 99.080% epoch: 110 batch: 200 [ 20000/60000] loss: 0.00812645 accuracy: 98.975% epoch: 111 batch: 200 [ 20000/60000] loss: 0.02069528 accuracy: 99.370% epoch: 112 batch: 200 [ 20000/60000] loss: 0.00069322 accuracy: 99.550% epoch: 113 batch: 200 [ 20000/60000] loss: 0.00923531 accuracy: 99.330% epoch: 114 batch: 200 [ 20000/60000] loss: 0.01679049 accuracy: 99.400% epoch: 115 batch: 200 [ 20000/60000] loss: 0.00004530 accuracy: 99.610% epoch: 116 batch: 200 [ 20000/60000] loss: 0.00006736 accuracy: 99.620% epoch: 117 batch: 200 [ 20000/60000] loss: 0.04991628 accuracy: 99.465% epoch: 118 batch: 200 [ 20000/60000] loss: 0.11569741 accuracy: 99.580% epoch: 119 batch: 200 [ 20000/60000] loss: 0.03128894 accuracy: 99.675% epoch: 120 batch: 200 [ 20000/60000] loss: 0.06901843 accuracy: 99.315% epoch: 121 batch: 200 [ 20000/60000] loss: 0.02064684 accuracy: 99.020% epoch: 122 batch: 200 [ 20000/60000] loss: 0.00048014 accuracy: 98.965% epoch: 123 batch: 200 [ 20000/60000] loss: 0.00116263 accuracy: 99.255% epoch: 124 batch: 200 [ 20000/60000] loss: 0.00035129 accuracy: 99.225% epoch: 125 batch: 200 [ 20000/60000] loss: 0.01537568 accuracy: 99.140% epoch: 126 batch: 200 [ 20000/60000] loss: 0.00037159 accuracy: 99.435% epoch: 127 batch: 200 [ 20000/60000] loss: 0.00057967 accuracy: 99.710% epoch: 128 batch: 200 [ 20000/60000] loss: 0.00083547 accuracy: 99.735% epoch: 129 batch: 200 [ 20000/60000] loss: 0.01184410 accuracy: 99.460% epoch: 130 batch: 200 [ 20000/60000] loss: 0.00450716 accuracy: 99.190% epoch: 131 batch: 200 [ 20000/60000] loss: 0.05193027 accuracy: 99.420% epoch: 132 batch: 200 [ 20000/60000] loss: 0.01142415 accuracy: 99.385% epoch: 133 batch: 200 [ 20000/60000] loss: 0.00068017 accuracy: 99.505% epoch: 134 batch: 200 [ 20000/60000] loss: 0.01001617 accuracy: 99.615% epoch: 135 batch: 200 [ 20000/60000] loss: 0.00016408 accuracy: 99.675% epoch: 136 batch: 200 [ 20000/60000] loss: 0.02158948 accuracy: 99.510% epoch: 137 batch: 200 [ 20000/60000] loss: 0.02096409 accuracy: 99.605% epoch: 138 batch: 200 [ 20000/60000] loss: 0.05895118 accuracy: 99.540% epoch: 139 batch: 200 [ 20000/60000] loss: 0.08298568 accuracy: 99.400% epoch: 140 batch: 200 [ 20000/60000] loss: 0.00043201 accuracy: 99.465% epoch: 141 batch: 200 [ 20000/60000] loss: 0.00029940 accuracy: 99.550% epoch: 142 batch: 200 [ 20000/60000] loss: 0.00008872 accuracy: 99.530% epoch: 143 batch: 200 [ 20000/60000] loss: 0.00145735 accuracy: 99.480% epoch: 144 batch: 200 [ 20000/60000] loss: 0.00147207 accuracy: 99.450% epoch: 145 batch: 200 [ 20000/60000] loss: 0.00436685 accuracy: 99.510% epoch: 146 batch: 200 [ 20000/60000] loss: 0.02752648 accuracy: 99.415% epoch: 147 batch: 200 [ 20000/60000] loss: 0.00003798 accuracy: 99.390% epoch: 148 batch: 200 [ 20000/60000] loss: 0.15041187 accuracy: 99.380% epoch: 149 batch: 200 [ 20000/60000] loss: 0.01217123 accuracy: 99.500% Duration: 571 seconds
## loss
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(15, 10), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
## accuracy
figure(figsize=(15, 10), dpi=80)
plt.plot([t/200 for t in train_correct], label='training accuracy')
plt.plot([t/50 for t in test_correct], label='validation accuracy')
plt.title('accuracy error at the end of each epoch')
plt.legend();
test_cc = [t/50 for t in test_correct]
max_index = test_cc.index(max(test_cc))
print(f'epoch with the best reslut: {max_index}')
print(f'best test accuracy is: {test_cc[max_index]}')
epoch with the best reslut: 144 best test accuracy is: 81.36000061035156
train_loader = DataLoader(train_q5, batch_size=100, shuffle=True)
val_loader = DataLoader(val_q5,batch_size=100, shuffle=True)
test_loader = DataLoader(test_q5, batch_size=500, shuffle=False)
torch.manual_seed(42)
model = MultilayerPerceptron()
model
MultilayerPerceptron( (fc1): Linear(in_features=1568, out_features=784, bias=True) (fc2): Linear(in_features=784, out_features=120, bias=True) (fc3): Linear(in_features=120, out_features=84, bias=True) (fc4): Linear(in_features=84, out_features=19, bias=True) )
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.5)
# 1st training
import time
start_time = time.time()
torch.manual_seed(42)
epochs = 150
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = model(X_train)
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%200 == 0:
print(f'epoch: {i:2} batch: {b:4} [{100*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(100*b):7.3f}%')
# Update train loss & accuracy for the epoch
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_vali, y_vali) in enumerate(test_loader):
# Apply the model
y_val = model(X_vali) # Here we flatten X_test
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_vali).sum()
# Update test loss & accuracy for the epoch
loss = criterion(y_val, y_vali)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 200 [ 20000/60000] loss: 2.83202457 accuracy: 8.550% epoch: 1 batch: 200 [ 20000/60000] loss: 2.83711338 accuracy: 9.405% epoch: 2 batch: 200 [ 20000/60000] loss: 2.82349730 accuracy: 8.660% epoch: 3 batch: 200 [ 20000/60000] loss: 2.83457184 accuracy: 8.765% epoch: 4 batch: 200 [ 20000/60000] loss: 2.82189202 accuracy: 9.320% epoch: 5 batch: 200 [ 20000/60000] loss: 2.90477633 accuracy: 9.030% epoch: 6 batch: 200 [ 20000/60000] loss: 2.83563852 accuracy: 8.905% epoch: 7 batch: 200 [ 20000/60000] loss: 2.88640904 accuracy: 8.840% epoch: 8 batch: 200 [ 20000/60000] loss: 2.89946198 accuracy: 8.715% epoch: 9 batch: 200 [ 20000/60000] loss: 2.85119629 accuracy: 9.115% epoch: 10 batch: 200 [ 20000/60000] loss: 2.81457829 accuracy: 8.930% epoch: 11 batch: 200 [ 20000/60000] loss: 2.83775473 accuracy: 8.975% epoch: 12 batch: 200 [ 20000/60000] loss: 2.69786000 accuracy: 9.150% epoch: 13 batch: 200 [ 20000/60000] loss: 2.84287882 accuracy: 8.780% epoch: 14 batch: 200 [ 20000/60000] loss: 2.75385690 accuracy: 9.150% epoch: 15 batch: 200 [ 20000/60000] loss: 2.82961750 accuracy: 8.820% epoch: 16 batch: 200 [ 20000/60000] loss: 2.77625918 accuracy: 9.135% epoch: 17 batch: 200 [ 20000/60000] loss: 2.78135037 accuracy: 9.245% epoch: 18 batch: 200 [ 20000/60000] loss: 2.89862180 accuracy: 9.040% epoch: 19 batch: 200 [ 20000/60000] loss: 2.80007720 accuracy: 9.015% epoch: 20 batch: 200 [ 20000/60000] loss: 2.87532020 accuracy: 9.090% epoch: 21 batch: 200 [ 20000/60000] loss: 2.84110165 accuracy: 8.905% epoch: 22 batch: 200 [ 20000/60000] loss: 2.84115744 accuracy: 8.795% epoch: 23 batch: 200 [ 20000/60000] loss: 2.92020011 accuracy: 8.980% epoch: 24 batch: 200 [ 20000/60000] loss: 2.80254483 accuracy: 9.055% epoch: 25 batch: 200 [ 20000/60000] loss: 2.86027002 accuracy: 8.955% epoch: 26 batch: 200 [ 20000/60000] loss: 2.83899760 accuracy: 8.835% epoch: 27 batch: 200 [ 20000/60000] loss: 3.02493048 accuracy: 8.905% epoch: 28 batch: 200 [ 20000/60000] loss: 2.70616913 accuracy: 9.440% epoch: 29 batch: 200 [ 20000/60000] loss: 2.77409554 accuracy: 8.785% epoch: 30 batch: 200 [ 20000/60000] loss: 2.76099396 accuracy: 8.730% epoch: 31 batch: 200 [ 20000/60000] loss: 2.91962647 accuracy: 9.355% epoch: 32 batch: 200 [ 20000/60000] loss: 2.94596386 accuracy: 9.225% epoch: 33 batch: 200 [ 20000/60000] loss: 2.90094137 accuracy: 8.745% epoch: 34 batch: 200 [ 20000/60000] loss: 2.77558446 accuracy: 8.860% epoch: 35 batch: 200 [ 20000/60000] loss: 2.83617640 accuracy: 8.845% epoch: 36 batch: 200 [ 20000/60000] loss: 2.83853436 accuracy: 9.100% epoch: 37 batch: 200 [ 20000/60000] loss: 2.80383706 accuracy: 9.045% epoch: 38 batch: 200 [ 20000/60000] loss: 2.76549721 accuracy: 9.070% epoch: 39 batch: 200 [ 20000/60000] loss: 2.84896302 accuracy: 9.135% epoch: 40 batch: 200 [ 20000/60000] loss: 2.76921391 accuracy: 8.570% epoch: 41 batch: 200 [ 20000/60000] loss: 2.86489582 accuracy: 9.300% epoch: 42 batch: 200 [ 20000/60000] loss: 2.88047814 accuracy: 9.065% epoch: 43 batch: 200 [ 20000/60000] loss: 2.88814259 accuracy: 9.095% epoch: 44 batch: 200 [ 20000/60000] loss: 2.87787080 accuracy: 9.240% epoch: 45 batch: 200 [ 20000/60000] loss: 2.82768941 accuracy: 8.590% epoch: 46 batch: 200 [ 20000/60000] loss: 2.86258769 accuracy: 9.140% epoch: 47 batch: 200 [ 20000/60000] loss: 2.91280007 accuracy: 8.965% epoch: 48 batch: 200 [ 20000/60000] loss: 2.73156190 accuracy: 8.600% epoch: 49 batch: 200 [ 20000/60000] loss: 2.79892397 accuracy: 9.155% epoch: 50 batch: 200 [ 20000/60000] loss: 2.87909007 accuracy: 9.220% epoch: 51 batch: 200 [ 20000/60000] loss: 2.94285464 accuracy: 8.825% epoch: 52 batch: 200 [ 20000/60000] loss: 2.79827547 accuracy: 8.995% epoch: 53 batch: 200 [ 20000/60000] loss: 2.86481762 accuracy: 9.085% epoch: 54 batch: 200 [ 20000/60000] loss: 2.68634820 accuracy: 9.120% epoch: 55 batch: 200 [ 20000/60000] loss: 2.79314828 accuracy: 8.595% epoch: 56 batch: 200 [ 20000/60000] loss: 2.90098262 accuracy: 8.985% epoch: 57 batch: 200 [ 20000/60000] loss: 2.87474728 accuracy: 8.810% epoch: 58 batch: 200 [ 20000/60000] loss: 2.75054574 accuracy: 9.500% epoch: 59 batch: 200 [ 20000/60000] loss: 2.86641812 accuracy: 9.250% epoch: 60 batch: 200 [ 20000/60000] loss: 2.82593751 accuracy: 8.875% epoch: 61 batch: 200 [ 20000/60000] loss: 2.87299919 accuracy: 9.235% epoch: 62 batch: 200 [ 20000/60000] loss: 2.94131494 accuracy: 9.230% epoch: 63 batch: 200 [ 20000/60000] loss: 2.85255837 accuracy: 9.015% epoch: 64 batch: 200 [ 20000/60000] loss: 2.78466916 accuracy: 8.950% epoch: 65 batch: 200 [ 20000/60000] loss: 2.80333185 accuracy: 8.975% epoch: 66 batch: 200 [ 20000/60000] loss: 2.89575481 accuracy: 9.455% epoch: 67 batch: 200 [ 20000/60000] loss: 2.89977050 accuracy: 9.015% epoch: 68 batch: 200 [ 20000/60000] loss: 2.95599771 accuracy: 8.915% epoch: 69 batch: 200 [ 20000/60000] loss: 2.87455988 accuracy: 8.610% epoch: 70 batch: 200 [ 20000/60000] loss: 2.88084340 accuracy: 9.210% epoch: 71 batch: 200 [ 20000/60000] loss: 2.86667848 accuracy: 9.095% epoch: 72 batch: 200 [ 20000/60000] loss: 2.77268505 accuracy: 8.870% epoch: 73 batch: 200 [ 20000/60000] loss: 2.77649283 accuracy: 9.015% epoch: 74 batch: 200 [ 20000/60000] loss: 2.90196371 accuracy: 8.955% epoch: 75 batch: 200 [ 20000/60000] loss: 2.88448644 accuracy: 9.100% epoch: 76 batch: 200 [ 20000/60000] loss: 2.76664639 accuracy: 9.315% epoch: 77 batch: 200 [ 20000/60000] loss: 2.80561209 accuracy: 9.255% epoch: 78 batch: 200 [ 20000/60000] loss: 2.91550016 accuracy: 8.720% epoch: 79 batch: 200 [ 20000/60000] loss: 2.80132747 accuracy: 8.615% epoch: 80 batch: 200 [ 20000/60000] loss: 2.88472891 accuracy: 8.795% epoch: 81 batch: 200 [ 20000/60000] loss: 2.76366520 accuracy: 8.835% epoch: 82 batch: 200 [ 20000/60000] loss: 2.94135809 accuracy: 8.780% epoch: 83 batch: 200 [ 20000/60000] loss: 2.90346432 accuracy: 8.900% epoch: 84 batch: 200 [ 20000/60000] loss: 2.82975650 accuracy: 9.095% epoch: 85 batch: 200 [ 20000/60000] loss: 2.82665944 accuracy: 9.085% epoch: 86 batch: 200 [ 20000/60000] loss: 2.75031686 accuracy: 8.745% epoch: 87 batch: 200 [ 20000/60000] loss: 2.84207940 accuracy: 8.770% epoch: 88 batch: 200 [ 20000/60000] loss: 2.85639334 accuracy: 8.730% epoch: 89 batch: 200 [ 20000/60000] loss: 2.81907988 accuracy: 9.520% epoch: 90 batch: 200 [ 20000/60000] loss: 2.92242455 accuracy: 9.000% epoch: 91 batch: 200 [ 20000/60000] loss: 2.83206844 accuracy: 8.555% epoch: 92 batch: 200 [ 20000/60000] loss: 2.79352164 accuracy: 9.025% epoch: 93 batch: 200 [ 20000/60000] loss: 2.83133221 accuracy: 9.200% epoch: 94 batch: 200 [ 20000/60000] loss: 2.94598937 accuracy: 8.825% epoch: 95 batch: 200 [ 20000/60000] loss: 2.81433392 accuracy: 8.595% epoch: 96 batch: 200 [ 20000/60000] loss: 2.84377527 accuracy: 8.870% epoch: 97 batch: 200 [ 20000/60000] loss: 2.75865531 accuracy: 9.115% epoch: 98 batch: 200 [ 20000/60000] loss: 2.85397649 accuracy: 9.285% epoch: 99 batch: 200 [ 20000/60000] loss: 2.77723622 accuracy: 9.085% epoch: 100 batch: 200 [ 20000/60000] loss: 2.89199400 accuracy: 8.335% epoch: 101 batch: 200 [ 20000/60000] loss: 2.75769615 accuracy: 8.770% epoch: 102 batch: 200 [ 20000/60000] loss: 2.72615266 accuracy: 8.910% epoch: 103 batch: 200 [ 20000/60000] loss: 2.81274772 accuracy: 9.240% epoch: 104 batch: 200 [ 20000/60000] loss: 2.79876041 accuracy: 8.770% epoch: 105 batch: 200 [ 20000/60000] loss: 2.77392864 accuracy: 9.195% epoch: 106 batch: 200 [ 20000/60000] loss: 2.85413647 accuracy: 8.690% epoch: 107 batch: 200 [ 20000/60000] loss: 2.80103374 accuracy: 8.905% epoch: 108 batch: 200 [ 20000/60000] loss: 2.84421325 accuracy: 9.130% epoch: 109 batch: 200 [ 20000/60000] loss: 2.81132793 accuracy: 8.965% epoch: 110 batch: 200 [ 20000/60000] loss: 2.71434903 accuracy: 9.220% epoch: 111 batch: 200 [ 20000/60000] loss: 2.85178471 accuracy: 9.190% epoch: 112 batch: 200 [ 20000/60000] loss: 2.97340107 accuracy: 8.860% epoch: 113 batch: 200 [ 20000/60000] loss: 2.72473598 accuracy: 8.950% epoch: 114 batch: 200 [ 20000/60000] loss: 2.87149358 accuracy: 9.190% epoch: 115 batch: 200 [ 20000/60000] loss: 2.78650761 accuracy: 9.395% epoch: 116 batch: 200 [ 20000/60000] loss: 2.75146770 accuracy: 9.060% epoch: 117 batch: 200 [ 20000/60000] loss: 2.93625998 accuracy: 8.875% epoch: 118 batch: 200 [ 20000/60000] loss: 2.93880820 accuracy: 8.870% epoch: 119 batch: 200 [ 20000/60000] loss: 2.78282142 accuracy: 8.955% epoch: 120 batch: 200 [ 20000/60000] loss: 2.75555754 accuracy: 8.925% epoch: 121 batch: 200 [ 20000/60000] loss: 2.81560206 accuracy: 8.910% epoch: 122 batch: 200 [ 20000/60000] loss: 2.78668213 accuracy: 8.790% epoch: 123 batch: 200 [ 20000/60000] loss: 2.89597225 accuracy: 8.790% epoch: 124 batch: 200 [ 20000/60000] loss: 2.79240489 accuracy: 9.075% epoch: 125 batch: 200 [ 20000/60000] loss: 2.89482021 accuracy: 9.070% epoch: 126 batch: 200 [ 20000/60000] loss: 2.88502288 accuracy: 9.005% epoch: 127 batch: 200 [ 20000/60000] loss: 2.78323340 accuracy: 8.380% epoch: 128 batch: 200 [ 20000/60000] loss: 2.88720846 accuracy: 9.320% epoch: 129 batch: 200 [ 20000/60000] loss: 2.77211833 accuracy: 8.905% epoch: 130 batch: 200 [ 20000/60000] loss: 2.83036160 accuracy: 8.840% epoch: 131 batch: 200 [ 20000/60000] loss: 2.70192146 accuracy: 8.960% epoch: 132 batch: 200 [ 20000/60000] loss: 2.74491429 accuracy: 8.805% epoch: 133 batch: 200 [ 20000/60000] loss: 2.86804104 accuracy: 9.025% epoch: 134 batch: 200 [ 20000/60000] loss: 2.95395970 accuracy: 8.995% epoch: 135 batch: 200 [ 20000/60000] loss: 2.91409230 accuracy: 8.895% epoch: 136 batch: 200 [ 20000/60000] loss: 2.89550424 accuracy: 8.755% epoch: 137 batch: 200 [ 20000/60000] loss: 2.75503516 accuracy: 8.935% epoch: 138 batch: 200 [ 20000/60000] loss: 2.83743095 accuracy: 9.020% epoch: 139 batch: 200 [ 20000/60000] loss: 2.81507206 accuracy: 9.045% epoch: 140 batch: 200 [ 20000/60000] loss: 2.88142061 accuracy: 9.060% epoch: 141 batch: 200 [ 20000/60000] loss: 2.97725344 accuracy: 9.490% epoch: 142 batch: 200 [ 20000/60000] loss: 2.84749150 accuracy: 9.200% epoch: 143 batch: 200 [ 20000/60000] loss: 2.86408663 accuracy: 8.905% epoch: 144 batch: 200 [ 20000/60000] loss: 2.78501534 accuracy: 8.690% epoch: 145 batch: 200 [ 20000/60000] loss: 2.77095819 accuracy: 9.120% epoch: 146 batch: 200 [ 20000/60000] loss: 2.87471223 accuracy: 9.465% epoch: 147 batch: 200 [ 20000/60000] loss: 2.88495445 accuracy: 9.540% epoch: 148 batch: 200 [ 20000/60000] loss: 2.78322816 accuracy: 9.205% epoch: 149 batch: 200 [ 20000/60000] loss: 2.76855397 accuracy: 9.260% Duration: 596 seconds
## loss
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(15, 10), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
## accuracy
figure(figsize=(15, 10), dpi=80)
plt.plot([t/200 for t in train_correct], label='training accuracy')
plt.plot([t/50 for t in test_correct], label='validation accuracy')
plt.title('accuracy error at the end of each epoch')
plt.legend();
test_cc = [t/50 for t in test_correct]
max_index = test_cc.index(max(test_cc))
print(f'epoch with the best reslut: {max_index}')
print(f'best test accuracy is: {test_cc[max_index]}')
epoch with the best reslut: 4 best test accuracy is: 10.699999809265137
class CustomerDataset(Dataset):
def __init__(self, file_path):
file_out = pd.read_table(file_path,sep=',',names=range(1569))
X = np.array(file_out.iloc[:,:1568].values).reshape(file_out.shape[0],28,56)
y = file_out.iloc[:,1568].values
# convert to tensor
self.X = torch.tensor(X,dtype=torch.float32)
self.y = torch.tensor(y)
def __len__(self):
return len(self.y)
def __getitem__(self,idx):
return self.X[idx],self.y[idx]
train_q5 = CustomerDataset(train_path)
val_q5 = CustomerDataset(val_path)
test_q5 = CustomerDataset(test_path)
train_loader = DataLoader(train_q5, batch_size=10, shuffle=True)
val_loader = DataLoader(val_q5,batch_size=10, shuffle=True)
test_loader = DataLoader(test_q5, batch_size=10, shuffle=False)
class ConvolutionalNetwork(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 6, 3, 1)
self.conv2 = nn.Conv2d(6, 16, 3, 1)
self.fc1 = nn.Linear(5*12*16, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84,19)
def forward(self, X):
X = F.relu(self.conv1(X))
X = F.max_pool2d(X, 2, 2)
X = F.relu(self.conv2(X))
X = F.max_pool2d(X, 2, 2)
X = X.view(-1, 5*12*16)
X = F.relu(self.fc1(X))
X = F.relu(self.fc2(X))
X = self.fc3(X)
return F.log_softmax(X, dim=1)
torch.manual_seed(42)
cnn_model = ConvolutionalNetwork()
cnn_model
ConvolutionalNetwork( (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1)) (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1)) (fc1): Linear(in_features=960, out_features=120, bias=True) (fc2): Linear(in_features=120, out_features=84, bias=True) (fc3): Linear(in_features=84, out_features=19, bias=True) )
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_model.parameters(), lr=0.001)
import time
start_time = time.time()
torch.manual_seed(42)
epochs = 70
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = cnn_model(X_train.view(10,1,28,56))
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%600 == 0 and i%10==0:
print(f'epoch: {i:2} batch: {b:4} [{10*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(10*b):7.3f}%')
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_vali, y_vali) in enumerate(val_loader):
# Apply the model
y_val = cnn_model(X_vali.view(10,1,28,56))
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_vali).sum()
loss = criterion(y_val, y_vali)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 600 [ 6000/60000] loss: 2.45193720 accuracy: 12.567% epoch: 0 batch: 1200 [ 12000/60000] loss: 1.95093596 accuracy: 19.508% epoch: 0 batch: 1800 [ 18000/60000] loss: 1.45480847 accuracy: 26.933% epoch: 10 batch: 600 [ 6000/60000] loss: 0.06125475 accuracy: 97.600% epoch: 10 batch: 1200 [ 12000/60000] loss: 0.06559862 accuracy: 97.017% epoch: 10 batch: 1800 [ 18000/60000] loss: 0.22830176 accuracy: 96.711% epoch: 20 batch: 600 [ 6000/60000] loss: 0.00678120 accuracy: 99.167% epoch: 20 batch: 1200 [ 12000/60000] loss: 0.00021980 accuracy: 98.742% epoch: 20 batch: 1800 [ 18000/60000] loss: 0.01231803 accuracy: 98.567% epoch: 30 batch: 600 [ 6000/60000] loss: 0.00005814 accuracy: 99.383% epoch: 30 batch: 1200 [ 12000/60000] loss: 0.00109983 accuracy: 99.283% epoch: 30 batch: 1800 [ 18000/60000] loss: 0.00002539 accuracy: 99.194% epoch: 40 batch: 600 [ 6000/60000] loss: 0.00716525 accuracy: 99.583% epoch: 40 batch: 1200 [ 12000/60000] loss: 0.00060169 accuracy: 99.408% epoch: 40 batch: 1800 [ 18000/60000] loss: 0.01144945 accuracy: 99.367% epoch: 50 batch: 600 [ 6000/60000] loss: 0.00000318 accuracy: 99.283% epoch: 50 batch: 1200 [ 12000/60000] loss: 0.00003628 accuracy: 99.433% epoch: 50 batch: 1800 [ 18000/60000] loss: 0.02036251 accuracy: 99.411% epoch: 60 batch: 600 [ 6000/60000] loss: 0.00008557 accuracy: 99.783% epoch: 60 batch: 1200 [ 12000/60000] loss: 0.00000175 accuracy: 99.683% epoch: 60 batch: 1800 [ 18000/60000] loss: 0.00001374 accuracy: 99.617% Duration: 763 seconds
## loss
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(15, 10), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
## accuracy
figure(figsize=(15, 10), dpi=80)
plt.plot([t/200 for t in train_correct], label='training accuracy')
plt.plot([t/50 for t in test_correct], label='validation accuracy')
plt.title('accuracy error at the end of each epoch')
plt.legend();
# Extract the data all at once, not in batches
test_load_all = DataLoader(test_q5, batch_size=5000, shuffle=False)
with torch.no_grad():
correct = 0
for X_test, y_test in test_load_all:
y_val = cnn_model(X_test.view(5000,1,28,56)) # we don't flatten the data this time
predicted = torch.max(y_val,1)[1]
correct += (predicted == y_test).sum()
print(f'Test accuracy: {correct.item()}/{len(X_test)} = {correct.item()*100/(len(X_test)):7.3f}%')
Test accuracy: 4698/5000 = 93.960%
train_cc = [t/200 for t in train_correct]
test_cc = [t/50 for t in test_correct]
max_index = test_cc.index(max(test_cc))
max_index
46
weights = []
for wei in cnn_model.parameters():
weights.append(wei.detach().cpu().numpy())
for wei in weights:
print(wei.shape)
# Assemble to images.
fc_images = weights[4].reshape(10, 12, 20, 20)
pad_images = np.zeros((10, 12, 30, 30))
pad_images[:, :, 10:30, 10:30] = fc_images
fc_vis = np.transpose(pad_images, (0, 2, 1, 3)).reshape(300, 360)
# Show weights.
figure(figsize=(15, 20), dpi=80)
plt.figure(figsize=(12, 12))
plt.imshow(fc_vis, cmap='gray')
plt.title("Learned W for multiple layers.")
plt.axis("off")
plt.show()
train_loader = DataLoader(train_q5, batch_size=10, shuffle=True)
val_loader = DataLoader(val_q5,batch_size=10, shuffle=True)
test_loader = DataLoader(test_q5, batch_size=10, shuffle=False)
torch.manual_seed(42)
cnn_model = ConvolutionalNetwork()
cnn_model
ConvolutionalNetwork( (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1)) (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1)) (fc1): Linear(in_features=960, out_features=120, bias=True) (fc2): Linear(in_features=120, out_features=84, bias=True) (fc3): Linear(in_features=84, out_features=19, bias=True) )
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_model.parameters(), lr=0.2)
import time
start_time = time.time()
torch.manual_seed(42)
epochs = 70
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = cnn_model(X_train.view(10,1,28,56))
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%600 == 0 and i%10==0:
print(f'epoch: {i:2} batch: {b:4} [{10*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(10*b):7.3f}%')
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_vali, y_vali) in enumerate(val_loader):
# Apply the model
y_val = cnn_model(X_vali.view(10,1,28,56))
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_vali).sum()
loss = criterion(y_val, y_vali)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 600 [ 6000/60000] loss: 2.83128095 accuracy: 9.050% epoch: 0 batch: 1200 [ 12000/60000] loss: 2.85656953 accuracy: 8.975% epoch: 0 batch: 1800 [ 18000/60000] loss: 2.95758176 accuracy: 8.900% epoch: 10 batch: 600 [ 6000/60000] loss: 2.58340883 accuracy: 9.250% epoch: 10 batch: 1200 [ 12000/60000] loss: 2.96311903 accuracy: 8.850% epoch: 10 batch: 1800 [ 18000/60000] loss: 2.81125093 accuracy: 8.778% epoch: 20 batch: 600 [ 6000/60000] loss: 2.96598911 accuracy: 8.450% epoch: 20 batch: 1200 [ 12000/60000] loss: 2.77917218 accuracy: 8.625% epoch: 20 batch: 1800 [ 18000/60000] loss: 2.61734557 accuracy: 8.800% epoch: 30 batch: 600 [ 6000/60000] loss: 3.24664259 accuracy: 8.883% epoch: 30 batch: 1200 [ 12000/60000] loss: 3.18627691 accuracy: 9.050% epoch: 30 batch: 1800 [ 18000/60000] loss: 2.80974507 accuracy: 8.706% epoch: 40 batch: 600 [ 6000/60000] loss: 2.93477535 accuracy: 8.083% epoch: 40 batch: 1200 [ 12000/60000] loss: 2.79218936 accuracy: 8.367% epoch: 40 batch: 1800 [ 18000/60000] loss: 2.94950914 accuracy: 8.678% epoch: 50 batch: 600 [ 6000/60000] loss: 3.06053400 accuracy: 8.317% epoch: 50 batch: 1200 [ 12000/60000] loss: 2.90899801 accuracy: 8.075% epoch: 50 batch: 1800 [ 18000/60000] loss: 2.58354616 accuracy: 8.111% epoch: 60 batch: 600 [ 6000/60000] loss: 2.65577197 accuracy: 8.283% epoch: 60 batch: 1200 [ 12000/60000] loss: 2.62971640 accuracy: 8.650% epoch: 60 batch: 1800 [ 18000/60000] loss: 2.77422786 accuracy: 8.650% Duration: 711 seconds
## loss
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(15, 10), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
## accuracy
figure(figsize=(15, 10), dpi=80)
plt.plot([t/200 for t in train_correct], label='training accuracy')
plt.plot([t/50 for t in test_correct], label='validation accuracy')
plt.title('accuracy error at the end of each epoch')
plt.legend();
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
print(f'epoch with the best reslut: {max_index}')
print(f'best test accuracy is: {test_cc[max_index]}')
epoch with the best reslut: 3 best test accuracy is: 4.679999828338623
train_loader = DataLoader(train_q5, batch_size=10, shuffle=True)
val_loader = DataLoader(val_q5,batch_size=10, shuffle=True)
test_loader = DataLoader(test_q5, batch_size=10, shuffle=False)
torch.manual_seed(42)
cnn_model = ConvolutionalNetwork()
cnn_model
ConvolutionalNetwork( (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1)) (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1)) (fc1): Linear(in_features=960, out_features=120, bias=True) (fc2): Linear(in_features=120, out_features=84, bias=True) (fc3): Linear(in_features=84, out_features=19, bias=True) )
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_model.parameters(), lr=0.2)
import time
start_time = time.time()
torch.manual_seed(42)
epochs = 70
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train) in enumerate(train_loader):
b+=1
# Apply the model
y_pred = cnn_model(X_train.view(10,1,28,56))
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%600 == 0 and i%10==0:
print(f'epoch: {i:2} batch: {b:4} [{10*b:6}/60000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(10*b):7.3f}%')
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_vali, y_vali) in enumerate(val_loader):
# Apply the model
y_val = cnn_model(X_vali.view(10,1,28,56))
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_vali).sum()
loss = criterion(y_val, y_vali)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
epoch: 0 batch: 600 [ 6000/60000] loss: 2.83128095 accuracy: 9.050% epoch: 0 batch: 1200 [ 12000/60000] loss: 2.85656953 accuracy: 8.975% epoch: 0 batch: 1800 [ 18000/60000] loss: 2.95758176 accuracy: 8.900% epoch: 10 batch: 600 [ 6000/60000] loss: 2.58340883 accuracy: 9.250% epoch: 10 batch: 1200 [ 12000/60000] loss: 2.96311903 accuracy: 8.850% epoch: 10 batch: 1800 [ 18000/60000] loss: 2.81125093 accuracy: 8.778% epoch: 20 batch: 600 [ 6000/60000] loss: 2.96598911 accuracy: 8.450% epoch: 20 batch: 1200 [ 12000/60000] loss: 2.77917218 accuracy: 8.625% epoch: 20 batch: 1800 [ 18000/60000] loss: 2.61734557 accuracy: 8.800% epoch: 30 batch: 600 [ 6000/60000] loss: 3.24664259 accuracy: 8.883% epoch: 30 batch: 1200 [ 12000/60000] loss: 3.18627691 accuracy: 9.050% epoch: 30 batch: 1800 [ 18000/60000] loss: 2.80974507 accuracy: 8.706% epoch: 40 batch: 600 [ 6000/60000] loss: 2.93477535 accuracy: 8.083% epoch: 40 batch: 1200 [ 12000/60000] loss: 2.79218936 accuracy: 8.367% epoch: 40 batch: 1800 [ 18000/60000] loss: 2.94950914 accuracy: 8.678% epoch: 50 batch: 600 [ 6000/60000] loss: 3.06053400 accuracy: 8.317% epoch: 50 batch: 1200 [ 12000/60000] loss: 2.90899801 accuracy: 8.075% epoch: 50 batch: 1800 [ 18000/60000] loss: 2.58354616 accuracy: 8.111% epoch: 60 batch: 600 [ 6000/60000] loss: 2.65577197 accuracy: 8.283% epoch: 60 batch: 1200 [ 12000/60000] loss: 2.62971640 accuracy: 8.650% epoch: 60 batch: 1800 [ 18000/60000] loss: 2.77422786 accuracy: 8.650% Duration: 694 seconds
## loss
trl = [loss.item() for loss in train_losses ]
tel = [loss.item() for loss in test_losses ]
figure(figsize=(15, 10), dpi=80)
plt.plot(trl, label='training loss')
plt.plot(tel, label='validation loss')
plt.title('Loss at the end of each epoch')
plt.legend();
## accuracy
figure(figsize=(15, 10), dpi=80)
plt.plot([t/200 for t in train_correct], label='training accuracy')
plt.plot([t/50 for t in test_correct], label='validation accuracy')
plt.title('accuracy error at the end of each epoch')
plt.legend();
test_cc = [t/100 for t in test_correct]
max_index = test_cc.index(max(test_cc))
print(f'epoch with the best reslut: {max_index}')
print(f'best test accuracy is: {test_cc[max_index]}')
epoch with the best reslut: 3 best test accuracy is: 4.679999828338623